Merge tag 'please-pull-pstore' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl...
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 18 Sep 2013 17:39:40 +0000 (12:39 -0500)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 18 Sep 2013 17:39:40 +0000 (12:39 -0500)
Pull pstore/compression fixes from Tony Luck:
 "Three pstore fixes related to compression:
   1) Better adjustment of size of compression buffer (was too big for
      EFIVARS backend resulting in compression failure
   2) Use zlib_inflateInit2 instead of zlib_inflateInit
   3) Don't print messages about compression failure.  They will waste
      space that may better be used to log console output leading to the
      crash"

* tag 'please-pull-pstore' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux:
  pstore: Remove the messages related to compression failure
  pstore: Use zlib_inflateInit2 instead of zlib_inflateInit
  pstore: Adjust buffer size for compression for smaller registered buffers

1123 files changed:
Documentation/aoe/udev.txt
Documentation/block/cmdline-partition.txt [new file with mode: 0644]
Documentation/cgroups/memory.txt
Documentation/devicetree/bindings/leds/leds-lp55xx.txt
Documentation/devicetree/bindings/leds/pca963x.txt [new file with mode: 0644]
Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt [new file with mode: 0644]
Documentation/devicetree/bindings/rtc/rtc-omap.txt
Documentation/devicetree/bindings/rtc/rtc-palmas.txt [new file with mode: 0644]
Documentation/devicetree/bindings/sound/mvebu-audio.txt
Documentation/devicetree/bindings/thermal/exynos-thermal.txt [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/imx-thermal.txt [new file with mode: 0644]
Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
Documentation/dma-buf-sharing.txt
Documentation/filesystems/cifs.txt [deleted file]
Documentation/filesystems/cifs/AUTHORS [new file with mode: 0644]
Documentation/filesystems/cifs/CHANGES [new file with mode: 0644]
Documentation/filesystems/cifs/README [new file with mode: 0644]
Documentation/filesystems/cifs/TODO [new file with mode: 0644]
Documentation/filesystems/cifs/cifs.txt [new file with mode: 0644]
Documentation/filesystems/cifs/winucase_convert.pl [new file with mode: 0755]
Documentation/filesystems/porting
Documentation/filesystems/proc.txt
Documentation/filesystems/ramfs-rootfs-initramfs.txt
Documentation/leds/leds-lp5521.txt
Documentation/leds/leds-lp5523.txt
Documentation/leds/leds-lp55xx.txt
Documentation/networking/00-INDEX
Documentation/networking/i40e.txt [new file with mode: 0644]
Documentation/scheduler/sched-design-CFS.txt
Documentation/scsi/ChangeLog.megaraid_sas
Documentation/sysctl/kernel.txt
Documentation/sysctl/vm.txt
Documentation/thermal/exynos_thermal
Documentation/thermal/sysfs-api.txt
Documentation/vm/hugetlbpage.txt
Documentation/vm/soft-dirty.txt
MAINTAINERS
Makefile
arch/alpha/Kconfig
arch/alpha/lib/csum_partial_copy.c
arch/alpha/mm/fault.c
arch/arc/Kconfig
arch/arc/include/asm/sections.h
arch/arc/kernel/head.S
arch/arc/kernel/irq.c
arch/arc/kernel/setup.c
arch/arc/mm/fault.c
arch/arm/Kconfig
arch/arm/boot/dts/sun7i-a20-cubieboard2.dts
arch/arm/boot/dts/sun7i-a20-olinuxino-micro.dts
arch/arm/boot/dts/sun7i-a20.dtsi
arch/arm/common/timer-sp.c
arch/arm/lib/Makefile
arch/arm/lib/xor-neon.c
arch/arm/mach-ep93xx/core.c
arch/arm/mach-mvebu/armada-370-xp.c
arch/arm/mach-shmobile/board-lager.c
arch/arm/mach-shmobile/setup-r8a7779.c
arch/arm/mach-versatile/include/mach/platform.h
arch/arm/mach-versatile/pci.c
arch/arm/mach-vexpress/Makefile
arch/arm/mm/fault.c
arch/arm/mm/hugetlbpage.c
arch/arm/plat-pxa/ssp.c
arch/arm64/Kconfig
arch/arm64/mm/fault.c
arch/arm64/mm/hugetlbpage.c
arch/avr32/Kconfig
arch/avr32/mm/fault.c
arch/blackfin/Kconfig
arch/blackfin/boot/.gitignore
arch/blackfin/include/asm/scb.h [new file with mode: 0644]
arch/blackfin/kernel/setup.c
arch/blackfin/mach-bf609/Kconfig
arch/blackfin/mach-bf609/Makefile
arch/blackfin/mach-bf609/boards/ezkit.c
arch/blackfin/mach-bf609/clock.c
arch/blackfin/mach-bf609/include/mach/defBF60x_base.h
arch/blackfin/mach-bf609/scb.c [new file with mode: 0644]
arch/blackfin/mach-common/Makefile
arch/blackfin/mach-common/scb-init.c [new file with mode: 0644]
arch/c6x/Kconfig
arch/cris/Kconfig
arch/cris/mm/fault.c
arch/frv/Kconfig
arch/frv/mm/fault.c
arch/h8300/Kconfig
arch/hexagon/Kconfig
arch/hexagon/mm/vm_fault.c
arch/ia64/Kconfig
arch/ia64/mm/fault.c
arch/ia64/mm/hugetlbpage.c
arch/m32r/Kconfig
arch/m32r/mm/fault.c
arch/m68k/Kconfig
arch/m68k/mm/fault.c
arch/metag/Kconfig
arch/metag/mm/fault.c
arch/metag/mm/hugetlbpage.c
arch/microblaze/Kconfig
arch/microblaze/mm/fault.c
arch/mips/Kconfig
arch/mips/Makefile
arch/mips/ath79/clock.c
arch/mips/ath79/common.h
arch/mips/ath79/dev-common.c
arch/mips/ath79/setup.c
arch/mips/boot/.gitignore
arch/mips/boot/Makefile
arch/mips/boot/compressed/Makefile
arch/mips/boot/dts/include/dt-bindings [new symlink]
arch/mips/cavium-octeon/octeon-irq.c
arch/mips/cavium-octeon/setup.c
arch/mips/cavium-octeon/smp.c
arch/mips/configs/xway_defconfig [new file with mode: 0644]
arch/mips/dec/ioasic-irq.c
arch/mips/dec/time.c
arch/mips/include/asm/Kbuild
arch/mips/include/asm/bmips.h
arch/mips/include/asm/cpu.h
arch/mips/include/asm/cputime.h [deleted file]
arch/mips/include/asm/current.h [deleted file]
arch/mips/include/asm/dec/ioasic.h
arch/mips/include/asm/emergency-restart.h [deleted file]
arch/mips/include/asm/local64.h [deleted file]
arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h
arch/mips/include/asm/mach-cavium-octeon/gpio.h [new file with mode: 0644]
arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mach-ralink/mt7620.h
arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h [new file with mode: 0644]
arch/mips/include/asm/mutex.h [deleted file]
arch/mips/include/asm/netlogic/xlp-hal/bridge.h
arch/mips/include/asm/netlogic/xlp-hal/iomap.h
arch/mips/include/asm/netlogic/xlp-hal/pic.h
arch/mips/include/asm/netlogic/xlp-hal/sys.h
arch/mips/include/asm/netlogic/xlp-hal/xlp.h
arch/mips/include/asm/netlogic/xlr/pic.h
arch/mips/include/asm/octeon/octeon.h
arch/mips/include/asm/parport.h [deleted file]
arch/mips/include/asm/percpu.h [deleted file]
arch/mips/include/asm/scatterlist.h [deleted file]
arch/mips/include/asm/sections.h [deleted file]
arch/mips/include/asm/segment.h [deleted file]
arch/mips/include/asm/serial.h [deleted file]
arch/mips/include/asm/ucontext.h [deleted file]
arch/mips/include/asm/xor.h [deleted file]
arch/mips/include/uapi/asm/Kbuild
arch/mips/include/uapi/asm/auxvec.h [deleted file]
arch/mips/include/uapi/asm/ipcbuf.h [deleted file]
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/csrc-ioasic.c
arch/mips/kernel/idle.c
arch/mips/kernel/mcount.S
arch/mips/kernel/relocate_kernel.S
arch/mips/kernel/setup.c
arch/mips/kernel/smp-cmp.c
arch/mips/kernel/time.c
arch/mips/kernel/vmlinux.lds.S
arch/mips/kernel/vpe.c
arch/mips/lantiq/falcon/sysctrl.c
arch/mips/lantiq/xway/Makefile
arch/mips/lantiq/xway/dcdc.c [new file with mode: 0644]
arch/mips/lasat/image/Makefile
arch/mips/loongson/common/Makefile
arch/mips/math-emu/cp1emu.c
arch/mips/mm/c-octeon.c
arch/mips/mm/dma-default.c
arch/mips/mm/fault.c
arch/mips/mm/gup.c
arch/mips/mm/hugetlbpage.c
arch/mips/mm/init.c
arch/mips/mm/tlb-funcs.S
arch/mips/mm/tlbex.c
arch/mips/netlogic/Kconfig
arch/mips/netlogic/common/smp.c
arch/mips/netlogic/common/time.c
arch/mips/netlogic/dts/Makefile
arch/mips/netlogic/dts/xlp_evp.dts
arch/mips/netlogic/dts/xlp_fvp.dts [new file with mode: 0644]
arch/mips/netlogic/dts/xlp_svp.dts
arch/mips/netlogic/xlp/Makefile
arch/mips/netlogic/xlp/dt.c
arch/mips/netlogic/xlp/nlm_hal.c
arch/mips/netlogic/xlp/setup.c
arch/mips/netlogic/xlp/usb-init-xlp2.c [new file with mode: 0644]
arch/mips/netlogic/xlp/usb-init.c
arch/mips/netlogic/xlp/wakeup.c
arch/mips/pci/Makefile
arch/mips/pci/pci-octeon.c
arch/mips/pci/pci-rt3883.c [new file with mode: 0644]
arch/mips/powertv/Kconfig
arch/mips/powertv/asic/asic_devices.c
arch/mips/powertv/init.c
arch/mips/powertv/reset.c
arch/mips/ralink/Kconfig
arch/mips/ralink/Makefile
arch/mips/ralink/Platform
arch/mips/ralink/cevt-rt3352.c [new file with mode: 0644]
arch/mips/ralink/clk.c
arch/mips/ralink/common.h
arch/mips/ralink/mt7620.c
arch/mips/ralink/of.c
arch/mips/ralink/reset.c
arch/mips/ralink/timer.c [new file with mode: 0644]
arch/mips/txx9/generic/setup.c
arch/mn10300/Kconfig
arch/mn10300/mm/fault.c
arch/openrisc/Kconfig
arch/openrisc/mm/fault.c
arch/parisc/Kconfig
arch/parisc/mm/fault.c
arch/powerpc/Kconfig
arch/powerpc/include/asm/device.h
arch/powerpc/include/asm/fsl_pamu_stash.h [new file with mode: 0644]
arch/powerpc/mm/fault.c
arch/powerpc/mm/hugetlbpage.c
arch/powerpc/sysdev/fsl_pci.h
arch/s390/Kconfig
arch/s390/include/asm/kprobes.h
arch/s390/include/asm/sclp.h
arch/s390/kernel/crash_dump.c
arch/s390/kernel/kprobes.c
arch/s390/mm/fault.c
arch/s390/mm/hugetlbpage.c
arch/score/Kconfig
arch/score/mm/fault.c
arch/sh/Kconfig
arch/sh/mm/fault.c
arch/sh/mm/hugetlbpage.c
arch/sparc/Kconfig
arch/sparc/kernel/sys_sparc32.c
arch/sparc/mm/fault_32.c
arch/sparc/mm/fault_64.c
arch/sparc/mm/hugetlbpage.c
arch/tile/Kconfig
arch/tile/gxio/iorpc_mpipe.c
arch/tile/gxio/iorpc_mpipe_info.c
arch/tile/gxio/iorpc_trio.c
arch/tile/gxio/iorpc_usb_host.c
arch/tile/gxio/usb_host.c
arch/tile/include/arch/mpipe.h
arch/tile/include/arch/mpipe_constants.h
arch/tile/include/arch/mpipe_shm.h
arch/tile/include/arch/trio_constants.h
arch/tile/include/asm/page.h
arch/tile/include/asm/pgtable_32.h
arch/tile/include/asm/pgtable_64.h
arch/tile/include/gxio/iorpc_mpipe.h
arch/tile/include/gxio/iorpc_mpipe_info.h
arch/tile/include/gxio/iorpc_trio.h
arch/tile/include/gxio/iorpc_usb_host.h
arch/tile/include/gxio/usb_host.h
arch/tile/kernel/compat.c
arch/tile/kernel/futex_64.S [deleted file]
arch/tile/kernel/setup.c
arch/tile/kernel/unaligned.c
arch/tile/mm/fault.c
arch/tile/mm/hugetlbpage.c
arch/tile/mm/init.c
arch/tile/mm/pgtable.c
arch/um/Kconfig.common
arch/um/defconfig
arch/um/kernel/trap.c
arch/unicore32/Kconfig
arch/unicore32/mm/fault.c
arch/x86/Kconfig
arch/x86/include/asm/pgtable.h
arch/x86/include/asm/pgtable_types.h
arch/x86/include/asm/tlbflush.h
arch/x86/kernel/cpu/mtrr/generic.c
arch/x86/kernel/cpu/perf_event_intel.c
arch/x86/kernel/cpu/perf_event_intel_ds.c
arch/x86/kernel/cpu/perf_event_intel_uncore.c
arch/x86/kernel/entry_64.S
arch/x86/kernel/smpboot.c
arch/x86/kvm/emulate.c
arch/x86/kvm/mmu.c
arch/x86/kvm/paging_tmpl.h
arch/x86/kvm/vmx.c
arch/x86/mm/fault.c
arch/x86/mm/hugetlbpage.c
arch/x86/mm/tlb.c
arch/xtensa/Kconfig
arch/xtensa/Makefile
arch/xtensa/boot/Makefile
arch/xtensa/configs/common_defconfig
arch/xtensa/configs/iss_defconfig
arch/xtensa/configs/s6105_defconfig
arch/xtensa/include/asm/regs.h
arch/xtensa/include/asm/timex.h
arch/xtensa/kernel/align.S
arch/xtensa/kernel/coprocessor.S
arch/xtensa/kernel/entry.S
arch/xtensa/kernel/setup.c
arch/xtensa/kernel/time.c
arch/xtensa/kernel/vectors.S
arch/xtensa/kernel/xtensa_ksyms.c
arch/xtensa/mm/fault.c
block/Kconfig
block/Makefile
block/blk-ioc.c
block/blk-sysfs.c
block/cmdline-parser.c [new file with mode: 0644]
block/compat_ioctl.c
block/partitions/Kconfig
block/partitions/Makefile
block/partitions/check.c
block/partitions/cmdline.c [new file with mode: 0644]
block/partitions/cmdline.h [new file with mode: 0644]
block/partitions/efi.c
block/partitions/efi.h
crypto/Makefile
crypto/api.c
crypto/crct10dif.c [deleted file]
crypto/crct10dif_common.c [new file with mode: 0644]
crypto/crct10dif_generic.c [new file with mode: 0644]
drivers/acpi/acpi_lpss.c
drivers/acpi/acpica/exstore.c
drivers/acpi/glue.c
drivers/acpi/scan.c
drivers/base/dma-buf.c
drivers/base/node.c
drivers/block/aoe/aoe.h
drivers/block/aoe/aoeblk.c
drivers/block/aoe/aoecmd.c
drivers/block/aoe/aoedev.c
drivers/block/cciss.c
drivers/block/mg_disk.c
drivers/block/mtip32xx/Kconfig
drivers/block/osdblk.c
drivers/block/pktcdvd.c
drivers/block/rbd.c
drivers/block/swim.c
drivers/block/xen-blkback/xenbus.c
drivers/char/random.c
drivers/char/tpm/tpm_tis.c
drivers/clocksource/em_sti.c
drivers/clocksource/nomadik-mtu.c
drivers/clocksource/sh_cmt.c
drivers/clocksource/time-armada-370-xp.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/cpufreq_stats.c
drivers/cpufreq/intel_pstate.c
drivers/cpuidle/driver.c
drivers/dma/dw/Kconfig
drivers/firmware/dmi_scan.c
drivers/firmware/google/gsmi.c
drivers/gpio/Kconfig
drivers/gpio/Makefile
drivers/gpio/gpio-octeon.c [new file with mode: 0644]
drivers/gpu/drm/i915/i915_dma.c
drivers/gpu/drm/i915/i915_gem.c
drivers/gpu/drm/ttm/ttm_page_alloc.c
drivers/gpu/drm/ttm/ttm_page_alloc_dma.c
drivers/hid/Kconfig
drivers/hid/hid-core.c
drivers/hid/hid-input.c
drivers/hid/hid-lenovo-tpkbd.c
drivers/hid/hid-lg2ff.c
drivers/hid/hid-lg3ff.c
drivers/hid/hid-lg4ff.c
drivers/hid/hid-lgff.c
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-sony.c
drivers/hid/hid-steelseries.c
drivers/hid/hid-zpff.c
drivers/hwmon/amc6821.c
drivers/hwmon/emc2103.c
drivers/hwmon/ibmaem.c
drivers/hwmon/k10temp.c
drivers/hwmon/tmp421.c
drivers/i2c/Kconfig
drivers/i2c/busses/Kconfig
drivers/i2c/busses/i2c-davinci.c
drivers/iio/Kconfig
drivers/infiniband/hw/qib/Kconfig
drivers/infiniband/ulp/isert/ib_isert.c
drivers/infiniband/ulp/isert/ib_isert.h
drivers/input/evdev.c
drivers/input/keyboard/Kconfig
drivers/input/serio/Kconfig
drivers/input/touchscreen/Kconfig
drivers/iommu/Kconfig
drivers/iommu/Makefile
drivers/iommu/amd_iommu.c
drivers/iommu/amd_iommu_init.c
drivers/iommu/arm-smmu.c
drivers/iommu/exynos-iommu.c
drivers/iommu/fsl_pamu.c [new file with mode: 0644]
drivers/iommu/fsl_pamu.h [new file with mode: 0644]
drivers/iommu/fsl_pamu_domain.c [new file with mode: 0644]
drivers/iommu/fsl_pamu_domain.h [new file with mode: 0644]
drivers/iommu/intel-iommu.c
drivers/iommu/msm_iommu_dev.c
drivers/iommu/omap-iommu.c
drivers/leds/Kconfig
drivers/leds/Makefile
drivers/leds/leds-88pm860x.c
drivers/leds/leds-adp5520.c
drivers/leds/leds-asic3.c
drivers/leds/leds-atmel-pwm.c
drivers/leds/leds-bd2802.c
drivers/leds/leds-clevo-mail.c
drivers/leds/leds-da903x.c
drivers/leds/leds-da9052.c
drivers/leds/leds-gpio.c
drivers/leds/leds-lm3530.c
drivers/leds/leds-lm3533.c
drivers/leds/leds-lm355x.c
drivers/leds/leds-lm3642.c
drivers/leds/leds-lp3944.c
drivers/leds/leds-lp5521.c
drivers/leds/leds-lp5523.c
drivers/leds/leds-lp5562.c
drivers/leds/leds-lp55xx-common.c
drivers/leds/leds-lp55xx-common.h
drivers/leds/leds-lp8501.c [new file with mode: 0644]
drivers/leds/leds-lt3593.c
drivers/leds/leds-netxbig.c
drivers/leds/leds-ns2.c
drivers/leds/leds-pca9532.c
drivers/leds/leds-pca955x.c
drivers/leds/leds-pca9633.c [deleted file]
drivers/leds/leds-pca963x.c [new file with mode: 0644]
drivers/leds/leds-pwm.c
drivers/leds/leds-regulator.c
drivers/leds/leds-s3c24xx.c
drivers/leds/leds-ss4200.c
drivers/leds/leds-tca6507.c
drivers/leds/leds-wm831x-status.c
drivers/leds/leds-wm8350.c
drivers/leds/trigger/ledtrig-backlight.c
drivers/md/bcache/btree.c
drivers/md/bcache/sysfs.c
drivers/md/dm-bufio.c
drivers/media/platform/Kconfig
drivers/media/radio/Kconfig
drivers/memstick/core/Kconfig
drivers/memstick/core/Makefile
drivers/memstick/core/ms_block.c [new file with mode: 0644]
drivers/memstick/core/ms_block.h [new file with mode: 0644]
drivers/memstick/host/rtsx_pci_ms.c
drivers/mfd/Kconfig
drivers/misc/cb710/Kconfig
drivers/mmc/host/Kconfig
drivers/mtd/ubi/fastmap.c
drivers/mtd/ubi/wl.c
drivers/net/bonding/bond_main.c
drivers/net/bonding/bond_sysfs.c
drivers/net/bonding/bonding.h
drivers/net/ethernet/amd/declance.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
drivers/net/ethernet/cadence/Kconfig
drivers/net/ethernet/intel/Kconfig
drivers/net/ethernet/intel/Makefile
drivers/net/ethernet/intel/i40e/Makefile [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_adminq.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_adminq.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_alloc.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_common.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_debugfs.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_diag.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_diag.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_ethtool.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_hmc.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_hmc.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_main.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_nvm.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_osdep.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_prototype.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_register.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_status.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_txrx.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_txrx.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_type.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_virtchnl.h [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c [new file with mode: 0644]
drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h [new file with mode: 0644]
drivers/net/ethernet/korina.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
drivers/net/ethernet/tile/tilegx.c
drivers/net/irda/donauboe.c
drivers/net/irda/vlsi_ir.c
drivers/net/macvlan.c
drivers/net/usb/qmi_wwan.c
drivers/net/wireless/p54/Kconfig
drivers/net/wireless/ti/wl1251/Kconfig
drivers/net/wireless/ti/wlcore/Kconfig
drivers/of/of_reserved_mem.c
drivers/pci/hotplug/acpiphp_glue.c
drivers/pci/msi.c
drivers/platform/x86/Kconfig
drivers/platform/x86/amilo-rfkill.c
drivers/platform/x86/apple-gmux.c
drivers/platform/x86/classmate-laptop.c
drivers/platform/x86/compal-laptop.c
drivers/platform/x86/hp-wmi.c
drivers/platform/x86/intel-rst.c
drivers/platform/x86/intel-smartconnect.c
drivers/platform/x86/intel_mid_powerbtn.c
drivers/platform/x86/intel_mid_thermal.c
drivers/platform/x86/panasonic-laptop.c
drivers/platform/x86/samsung-q10.c
drivers/platform/x86/thinkpad_acpi.c
drivers/platform/x86/wmi.c
drivers/pnp/driver.c
drivers/power/Kconfig
drivers/pps/clients/Kconfig
drivers/pps/clients/pps-gpio.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-ds1511.c
drivers/rtc/rtc-ds1553.c
drivers/rtc/rtc-ds1742.c
drivers/rtc/rtc-ep93xx.c
drivers/rtc/rtc-hid-sensor-time.c
drivers/rtc/rtc-imxdi.c
drivers/rtc/rtc-lpc32xx.c
drivers/rtc/rtc-max77686.c
drivers/rtc/rtc-moxart.c [new file with mode: 0644]
drivers/rtc/rtc-mv.c
drivers/rtc/rtc-mxc.c
drivers/rtc/rtc-nuc900.c
drivers/rtc/rtc-omap.c
drivers/rtc/rtc-palmas.c
drivers/rtc/rtc-pcf2127.c
drivers/rtc/rtc-sirfsoc.c
drivers/rtc/rtc-stk17ta8.c
drivers/rtc/rtc-tx4939.c
drivers/s390/char/zcore.c
drivers/scsi/aic7xxx/aic7xxx_pci.c
drivers/scsi/esas2r/esas2r_flash.c
drivers/scsi/esas2r/esas2r_init.c
drivers/scsi/esas2r/esas2r_ioctl.c
drivers/scsi/esas2r/esas2r_vda.c
drivers/scsi/fnic/fnic.h
drivers/scsi/fnic/fnic_main.c
drivers/scsi/fnic/fnic_scsi.c
drivers/scsi/fnic/vnic_scsi.h
drivers/scsi/hpsa.c
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvscsi.c
drivers/scsi/ibmvscsi/viosrp.h
drivers/scsi/lpfc/lpfc.h
drivers/scsi/lpfc/lpfc_attr.c
drivers/scsi/lpfc/lpfc_bsg.c
drivers/scsi/lpfc/lpfc_hbadisc.c
drivers/scsi/lpfc/lpfc_init.c
drivers/scsi/lpfc/lpfc_scsi.c
drivers/scsi/lpfc/lpfc_sli.c
drivers/scsi/lpfc/lpfc_sli.h
drivers/scsi/lpfc/lpfc_sli4.h
drivers/scsi/lpfc/lpfc_version.h
drivers/scsi/megaraid/megaraid_sas.h
drivers/scsi/megaraid/megaraid_sas_base.c
drivers/scsi/megaraid/megaraid_sas_fp.c
drivers/scsi/megaraid/megaraid_sas_fusion.c
drivers/scsi/megaraid/megaraid_sas_fusion.h
drivers/scsi/mpt3sas/Makefile
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/qla2xxx/tcm_qla2xxx.h
drivers/scsi/sd.c
drivers/scsi/ufs/ufs.h
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/scsi/ufs/ufshci.h
drivers/scsi/ufs/unipro.h [new file with mode: 0644]
drivers/spi/Kconfig
drivers/staging/android/ashmem.c
drivers/staging/android/logger.c
drivers/staging/android/lowmemorykiller.c
drivers/staging/lustre/include/linux/libcfs/linux/linux-mem.h
drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
drivers/staging/lustre/lustre/llite/file.c
drivers/staging/lustre/lustre/obdclass/lu_object.c
drivers/staging/lustre/lustre/ptlrpc/sec_bulk.c
drivers/target/Makefile
drivers/target/iscsi/iscsi_target.c
drivers/target/iscsi/iscsi_target.h
drivers/target/iscsi/iscsi_target_auth.c
drivers/target/iscsi/iscsi_target_configfs.c
drivers/target/iscsi/iscsi_target_core.h
drivers/target/iscsi/iscsi_target_datain_values.c
drivers/target/iscsi/iscsi_target_device.c
drivers/target/iscsi/iscsi_target_erl0.c
drivers/target/iscsi/iscsi_target_erl1.c
drivers/target/iscsi/iscsi_target_erl2.c
drivers/target/iscsi/iscsi_target_login.c
drivers/target/iscsi/iscsi_target_login.h
drivers/target/iscsi/iscsi_target_nego.c
drivers/target/iscsi/iscsi_target_nodeattrib.c
drivers/target/iscsi/iscsi_target_parameters.c
drivers/target/iscsi/iscsi_target_seq_pdu_list.c
drivers/target/iscsi/iscsi_target_stat.c
drivers/target/iscsi/iscsi_target_tmr.c
drivers/target/iscsi/iscsi_target_tpg.c
drivers/target/iscsi/iscsi_target_tpg.h
drivers/target/iscsi/iscsi_target_tq.c
drivers/target/iscsi/iscsi_target_tq.h
drivers/target/iscsi/iscsi_target_util.c
drivers/target/loopback/tcm_loop.c
drivers/target/target_core_alua.c
drivers/target/target_core_configfs.c
drivers/target/target_core_device.c
drivers/target/target_core_fabric_configfs.c
drivers/target/target_core_fabric_lib.c
drivers/target/target_core_file.c
drivers/target/target_core_hba.c
drivers/target/target_core_iblock.c
drivers/target/target_core_internal.h
drivers/target/target_core_pr.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_rd.c
drivers/target/target_core_sbc.c
drivers/target/target_core_spc.c
drivers/target/target_core_stat.c
drivers/target/target_core_tmr.c
drivers/target/target_core_tpg.c
drivers/target/target_core_transport.c
drivers/target/target_core_ua.c
drivers/target/target_core_xcopy.c [new file with mode: 0644]
drivers/target/target_core_xcopy.h [new file with mode: 0644]
drivers/target/tcm_fc/tfc_conf.c
drivers/thermal/Kconfig
drivers/thermal/Makefile
drivers/thermal/cpu_cooling.c
drivers/thermal/exynos_thermal.c [deleted file]
drivers/thermal/imx_thermal.c [new file with mode: 0644]
drivers/thermal/samsung/Kconfig [new file with mode: 0644]
drivers/thermal/samsung/Makefile [new file with mode: 0644]
drivers/thermal/samsung/exynos_thermal_common.c [new file with mode: 0644]
drivers/thermal/samsung/exynos_thermal_common.h [new file with mode: 0644]
drivers/thermal/samsung/exynos_tmu.c [new file with mode: 0644]
drivers/thermal/samsung/exynos_tmu.h [new file with mode: 0644]
drivers/thermal/samsung/exynos_tmu_data.c [new file with mode: 0644]
drivers/thermal/samsung/exynos_tmu_data.h [new file with mode: 0644]
drivers/thermal/step_wise.c
drivers/thermal/thermal_core.c
drivers/thermal/thermal_hwmon.c [new file with mode: 0644]
drivers/thermal/thermal_hwmon.h [new file with mode: 0644]
drivers/thermal/ti-soc-thermal/dra752-thermal-data.c
drivers/thermal/ti-soc-thermal/ti-bandgap.c
drivers/thermal/ti-soc-thermal/ti-thermal-common.c
drivers/tty/serial/Kconfig
drivers/tty/tty_io.c
drivers/usb/dwc3/Kconfig
drivers/usb/gadget/Kconfig
drivers/usb/gadget/inode.c
drivers/usb/host/Kconfig
drivers/usb/musb/Kconfig
drivers/usb/renesas_usbhs/Kconfig
drivers/vhost/scsi.c
drivers/video/acornfb.c
drivers/video/acornfb.h
drivers/video/logo/logo_linux_clut224.ppm
drivers/w1/masters/Kconfig
drivers/w1/masters/mxc_w1.c
drivers/w1/w1.c
drivers/watchdog/Kconfig
drivers/watchdog/Makefile
drivers/watchdog/ar7_wdt.c
drivers/watchdog/hpwdt.c
drivers/watchdog/nuc900_wdt.c
drivers/watchdog/s3c2410_wdt.c
drivers/watchdog/sunxi_wdt.c [new file with mode: 0644]
drivers/watchdog/ts72xx_wdt.c
drivers/xen/balloon.c
fs/9p/vfs_file.c
fs/9p/vfs_inode.c
fs/adfs/inode.c
fs/affs/file.c
fs/aio.c
fs/anon_inodes.c
fs/bfs/file.c
fs/bio-integrity.c
fs/block_dev.c
fs/btrfs/Kconfig
fs/btrfs/Makefile
fs/btrfs/backref.c
fs/btrfs/backref.h
fs/btrfs/btrfs_inode.h
fs/btrfs/check-integrity.c
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/delayed-ref.c
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/extent_io.h
fs/btrfs/file-item.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/free-space-cache.h
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/ordered-data.c
fs/btrfs/ordered-data.h
fs/btrfs/print-tree.c
fs/btrfs/qgroup.c
fs/btrfs/raid56.c
fs/btrfs/relocation.c
fs/btrfs/root-tree.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tests/btrfs-tests.h [new file with mode: 0644]
fs/btrfs/tests/free-space-tests.c [new file with mode: 0644]
fs/btrfs/transaction.c
fs/btrfs/transaction.h
fs/btrfs/tree-log.c
fs/btrfs/uuid-tree.c [new file with mode: 0644]
fs/btrfs/volumes.c
fs/btrfs/volumes.h
fs/cifs/AUTHORS [deleted file]
fs/cifs/CHANGES [deleted file]
fs/cifs/Makefile
fs/cifs/README [deleted file]
fs/cifs/TODO [deleted file]
fs/cifs/cifs_unicode.h
fs/cifs/cifsfs.c
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/misc.c
fs/cifs/readdir.c
fs/cifs/sess.c
fs/cifs/smb1ops.c
fs/cifs/smb2file.c
fs/cifs/smb2inode.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2pdu.h
fs/cifs/smb2proto.h
fs/cifs/smb2transport.c
fs/cifs/winucase.c [new file with mode: 0644]
fs/coredump.c
fs/dcache.c
fs/drop_caches.c
fs/ecryptfs/crypto.c
fs/eventpoll.c
fs/exec.c
fs/exofs/inode.c
fs/ext2/inode.c
fs/ext4/extents_status.c
fs/ext4/inode.c
fs/fat/inode.c
fs/file_table.c
fs/fs-writeback.c
fs/fscache/page.c
fs/fuse/dir.c
fs/fuse/inode.c
fs/gfs2/bmap.c
fs/gfs2/glock.c
fs/gfs2/main.c
fs/gfs2/quota.c
fs/gfs2/quota.h
fs/hfs/inode.c
fs/hfsplus/Kconfig
fs/hfsplus/Makefile
fs/hfsplus/acl.h [new file with mode: 0644]
fs/hfsplus/dir.c
fs/hfsplus/hfsplus_fs.h
fs/hfsplus/inode.c
fs/hfsplus/posix_acl.c [new file with mode: 0644]
fs/hfsplus/xattr.c
fs/hfsplus/xattr.h
fs/hfsplus/xattr_security.c
fs/hpfs/file.c
fs/inode.c
fs/internal.h
fs/jfs/inode.c
fs/mbcache.c
fs/minix/inode.c
fs/namei.c
fs/namespace.c
fs/nfs/dir.c
fs/nfs/direct.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs4_fs.h
fs/nfs/nfs4proc.c
fs/nfs/nfs4xdr.c
fs/nfs/super.c
fs/nfsd/nfscache.c
fs/nilfs2/inode.c
fs/ntfs/file.c
fs/ocfs2/acl.c
fs/ocfs2/aops.c
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/cluster/tcp.c
fs/ocfs2/dlm/dlmast.c
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlm/dlmconvert.c
fs/ocfs2/dlm/dlmdebug.c
fs/ocfs2/dlm/dlmdomain.c
fs/ocfs2/dlm/dlmlock.c
fs/ocfs2/dlm/dlmmaster.c
fs/ocfs2/dlm/dlmrecovery.c
fs/ocfs2/dlm/dlmthread.c
fs/ocfs2/dlm/dlmunlock.c
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/extent_map.c
fs/ocfs2/file.c
fs/ocfs2/ioctl.c
fs/ocfs2/journal.c
fs/ocfs2/journal.h
fs/ocfs2/localalloc.c
fs/ocfs2/move_extents.c
fs/ocfs2/ocfs2_trace.h
fs/ocfs2/quota_global.c
fs/ocfs2/quota_local.c
fs/ocfs2/refcounttree.c
fs/ocfs2/xattr.c
fs/omfs/file.c
fs/proc/fd.c
fs/proc/meminfo.c
fs/proc/task_mmu.c
fs/proc/vmcore.c
fs/quota/dquot.c
fs/ramfs/inode.c
fs/read_write.c
fs/squashfs/block.c
fs/squashfs/dir.c
fs/squashfs/namei.c
fs/squashfs/squashfs_fs.h
fs/super.c
fs/sysv/itree.c
fs/ubifs/debug.c
fs/ubifs/shrinker.c
fs/ubifs/super.c
fs/ubifs/ubifs.h
fs/udf/file.c
fs/udf/inode.c
fs/ufs/inode.c
fs/xfs/kmem.c
fs/xfs/kmem.h
fs/xfs/xfs_acl.c
fs/xfs/xfs_aops.c
fs/xfs/xfs_bmap.c
fs/xfs/xfs_bmap_btree.c
fs/xfs/xfs_bmap_btree.h
fs/xfs/xfs_bmap_util.c
fs/xfs/xfs_btree.c
fs/xfs/xfs_btree.h
fs/xfs/xfs_buf.c
fs/xfs/xfs_buf.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dir2_leaf.c
fs/xfs/xfs_dquot.c
fs/xfs/xfs_dquot_item.c
fs/xfs/xfs_extent_busy.c
fs/xfs/xfs_icache.c
fs/xfs/xfs_icache.h
fs/xfs/xfs_inode_buf.c
fs/xfs/xfs_inode_buf.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_log.c
fs/xfs/xfs_log_format.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_qm.h
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
include/linux/aio.h
include/linux/anon_inodes.h
include/linux/backing-dev.h
include/linux/binfmts.h
include/linux/cmdline-parser.h [new file with mode: 0644]
include/linux/compat.h
include/linux/cpu_rmap.h
include/linux/cpufreq.h
include/linux/crash_dump.h
include/linux/dcache.h
include/linux/fs.h
include/linux/fs_struct.h
include/linux/genalloc.h
include/linux/hardirq.h
include/linux/hid.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/init.h
include/linux/interrupt.h
include/linux/iommu.h
include/linux/ipc_namespace.h
include/linux/irq.h
include/linux/irqdesc.h
include/linux/irqnr.h
include/linux/kernel_stat.h
include/linux/kprobes.h
include/linux/kvm_host.h
include/linux/list_lru.h [new file with mode: 0644]
include/linux/lz4.h
include/linux/memblock.h
include/linux/memcontrol.h
include/linux/mempolicy.h
include/linux/migrate.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mm_types.h
include/linux/mmzone.h
include/linux/pci_ids.h
include/linux/percpu_ida.h [new file with mode: 0644]
include/linux/platform_data/exynos_thermal.h [deleted file]
include/linux/platform_data/leds-lp55xx.h
include/linux/platform_data/leds-pca9633.h [deleted file]
include/linux/platform_data/leds-pca963x.h [new file with mode: 0644]
include/linux/radix-tree.h
include/linux/ramfs.h
include/linux/rbtree.h
include/linux/res_counter.h
include/linux/sched.h
include/linux/seqlock.h
include/linux/shrinker.h
include/linux/slab.h
include/linux/slab_def.h
include/linux/slob_def.h [deleted file]
include/linux/slub_def.h
include/linux/smp.h
include/linux/swap.h
include/linux/syscalls.h
include/linux/thermal.h
include/linux/time-armada-370-xp.h [deleted file]
include/linux/timex.h
include/linux/vm_event_item.h
include/linux/vmstat.h
include/linux/writeback.h
include/net/9p/client.h
include/net/ndisc.h
include/scsi/scsi.h
include/sound/rcar_snd.h
include/target/iscsi/iscsi_transport.h
include/target/target_core_backend.h
include/target/target_core_base.h
include/target/target_core_fabric.h
include/trace/events/btrfs.h
include/trace/events/kmem.h
include/trace/events/vmscan.h
include/uapi/linux/btrfs.h
include/uapi/linux/cifs/cifs_mount.h [new file with mode: 0644]
include/uapi/linux/fs.h
include/uapi/linux/input.h
include/uapi/linux/perf_event.h
init/Kconfig
init/do_mounts.c
ipc/msg.c
ipc/namespace.c
ipc/sem.c
ipc/shm.c
ipc/util.c
ipc/util.h
kernel/Makefile
kernel/events/core.c
kernel/events/uprobes.c
kernel/extable.c
kernel/fork.c
kernel/gcov/fs.c
kernel/irq/Kconfig
kernel/kexec.c
kernel/kprobes.c
kernel/ksysfs.c
kernel/modsign_pubkey.c
kernel/panic.c
kernel/params.c
kernel/power/hibernate.c
kernel/power/snapshot.c
kernel/power/user.c
kernel/ptrace.c
kernel/res_counter.c
kernel/sched/debug.c
kernel/sched/fair.c
kernel/sched/stats.h
kernel/signal.c
kernel/smp.c
kernel/softirq.c
kernel/spinlock.c
kernel/sysctl.c
kernel/task_work.c
kernel/time/ntp.c
kernel/time/timekeeping.c
kernel/up.c
lib/Kconfig.debug
lib/Makefile
lib/cpu_rmap.c
lib/crc-t10dif.c
lib/crc32.c
lib/decompress_inflate.c
lib/genalloc.c
lib/lz4/lz4_decompress.c
lib/percpu_ida.c [new file with mode: 0644]
lib/radix-tree.c
lib/rbtree.c
lib/rbtree_test.c
mm/Kconfig
mm/Makefile
mm/backing-dev.c
mm/compaction.c
mm/filemap.c
mm/huge_memory.c
mm/hugetlb.c
mm/hwpoison-inject.c
mm/internal.h
mm/kmemleak.c
mm/ksm.c
mm/list_lru.c [new file with mode: 0644]
mm/madvise.c
mm/memblock.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mempool.c
mm/migrate.c
mm/mlock.c
mm/mmap.c
mm/mremap.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_io.c
mm/page_isolation.c
mm/pgtable-generic.c
mm/readahead.c
mm/rmap.c
mm/shmem.c
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/sparse.c
mm/swap.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/util.c
mm/vmalloc.c
mm/vmscan.c
mm/vmstat.c
mm/zbud.c
mm/zswap.c
net/9p/client.c
net/9p/trans_virtio.c
net/Kconfig
net/core/flow_dissector.c
net/ipv4/tcp_memcontrol.c
net/ipv6/af_inet6.c
net/ipv6/exthdrs.c
net/ipv6/fib6_rules.c
net/ipv6/ip6_fib.c
net/ipv6/ndisc.c
net/openvswitch/flow.c
net/sched/sch_htb.c
net/sctp/socket.c
net/socket.c
net/sunrpc/auth.c
net/sunrpc/auth_generic.c
net/sunrpc/auth_gss/auth_gss.c
scripts/checkpatch.pl
scripts/config
scripts/package/buildtar
sound/pci/hda/hda_intel.c
sound/pci/hda/patch_cirrus.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/soc/atmel/atmel_ssc_dai.c
sound/soc/codecs/Kconfig
sound/soc/codecs/mc13783.c
sound/soc/fsl/Kconfig
sound/soc/fsl/imx-audmux.c
sound/soc/kirkwood/kirkwood-i2s.c
sound/soc/samsung/Kconfig
sound/soc/sh/rcar/scu.c
tools/perf/Makefile
tools/perf/builtin-annotate.c
tools/perf/builtin-inject.c
tools/perf/builtin-kvm.c
tools/perf/builtin-mem.c
tools/perf/builtin-report.c
tools/perf/builtin-script.c
tools/perf/builtin-trace.c
tools/perf/tests/builtin-test.c
tools/perf/tests/parse-no-sample-id-all.c [new file with mode: 0644]
tools/perf/tests/perf-record.c
tools/perf/tests/tests.h
tools/perf/ui/stdio/hist.c
tools/perf/util/build-id.c
tools/perf/util/event.c
tools/perf/util/event.h
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/header.c
tools/perf/util/machine.c
tools/perf/util/machine.h
tools/perf/util/map.c
tools/perf/util/map.h
tools/perf/util/session.c
tools/perf/util/tool.h
virt/kvm/async_pf.c
virt/kvm/kvm_main.c

index 8686e78..1f06daf 100644 (file)
@@ -23,4 +23,4 @@ SUBSYSTEM=="aoe", KERNEL=="revalidate",       NAME="etherd/%k", GROUP="disk", MODE="02
 SUBSYSTEM=="aoe", KERNEL=="flush",     NAME="etherd/%k", GROUP="disk", MODE="0220"
 
 # aoe block devices     
-KERNEL=="etherd*",       NAME="%k", GROUP="disk"
+KERNEL=="etherd*",       GROUP="disk"
diff --git a/Documentation/block/cmdline-partition.txt b/Documentation/block/cmdline-partition.txt
new file mode 100644 (file)
index 0000000..2bbf4cc
--- /dev/null
@@ -0,0 +1,39 @@
+Embedded device command line partition
+=====================================================================
+
+Read block device partition table from command line.
+The partition used for fixed block device (eMMC) embedded device.
+It is no MBR, save storage space. Bootloader can be easily accessed
+by absolute address of data on the block device.
+Users can easily change the partition.
+
+The format for the command line is just like mtdparts:
+
+blkdevparts=<blkdev-def>[;<blkdev-def>]
+  <blkdev-def> := <blkdev-id>:<partdef>[,<partdef>]
+    <partdef> := <size>[@<offset>](part-name)
+
+<blkdev-id>
+    block device disk name, embedded device used fixed block device,
+    it's disk name also fixed. such as: mmcblk0, mmcblk1, mmcblk0boot0.
+
+<size>
+    partition size, in bytes, such as: 512, 1m, 1G.
+
+<offset>
+    partition start address, in bytes.
+
+(part-name)
+    partition name, kernel send uevent with "PARTNAME". application can create
+    a link to block device partition with the name "PARTNAME".
+    user space application can access partition by partition name.
+
+Example:
+    eMMC disk name is "mmcblk0" and "mmcblk0boot0"
+
+  bootargs:
+    'blkdevparts=mmcblk0:1G(data0),1G(data1),-;mmcblk0boot0:1m(boot),-(kernel)'
+
+  dmesg:
+    mmcblk0: p1(data0) p2(data1) p3()
+    mmcblk0boot0: p1(boot) p2(kernel)
index 2a33306..8af4ad1 100644 (file)
@@ -490,6 +490,8 @@ pgpgin              - # of charging events to the memory cgroup. The charging
 pgpgout                - # of uncharging events to the memory cgroup. The uncharging
                event happens each time a page is unaccounted from the cgroup.
 swap           - # of bytes of swap usage
+writeback      - # of bytes of file/anon cache that are queued for syncing to
+               disk.
 inactive_anon  - # of bytes of anonymous and swap cache memory on inactive
                LRU list.
 active_anon    - # of bytes of anonymous and swap cache memory on active
index d517688..a61727f 100644 (file)
@@ -1,7 +1,7 @@
 Binding for TI/National Semiconductor LP55xx Led Drivers
 
 Required properties:
-- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562"
+- compatible: "national,lp5521" or "national,lp5523" or "ti,lp5562" or "ti,lp8501"
 - reg: I2C slave address
 - clock-mode: Input clock mode, (0: automode, 1: internal, 2: external)
 
@@ -11,6 +11,11 @@ Each child has own specific current settings
 
 Optional properties:
 - label: Used for naming LEDs
+- pwr-sel: LP8501 specific property. Power selection for output channels.
+         0: D1~9 are connected to VDD
+         1: D1~6 with VDD, D7~9 with VOUT
+         2: D1~6 with VOUT, D7~9 with VDD
+         3: D1~9 are connected to VOUT
 
 Alternatively, each child can have specific channel name
 - chan-name: Name of each channel name
@@ -145,3 +150,68 @@ lp5562@30 {
                max-cur = /bits/ 8 <0x60>;
        };
 };
+
+example 4) LP8501
+9 channels are defined. The 'pwr-sel' is LP8501 specific property.
+Others are same as LP5523.
+
+lp8501@32 {
+       compatible = "ti,lp8501";
+       reg = <0x32>;
+       clock-mode = /bits/ 8 <2>;
+       pwr-sel = /bits/ 8 <3>; /* D1~9 connected to VOUT */
+
+       chan0 {
+               chan-name = "d1";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan1 {
+               chan-name = "d2";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan2 {
+               chan-name = "d3";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan3 {
+               chan-name = "d4";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan4 {
+               chan-name = "d5";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan5 {
+               chan-name = "d6";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan6 {
+               chan-name = "d7";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan7 {
+               chan-name = "d8";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+
+       chan8 {
+               chan-name = "d9";
+               led-cur = /bits/ 8 <0x14>;
+               max-cur = /bits/ 8 <0x20>;
+       };
+};
diff --git a/Documentation/devicetree/bindings/leds/pca963x.txt b/Documentation/devicetree/bindings/leds/pca963x.txt
new file mode 100644 (file)
index 0000000..aece3ea
--- /dev/null
@@ -0,0 +1,47 @@
+LEDs connected to pca9632, pca9633 or pca9634
+
+Required properties:
+- compatible : should be : "nxp,pca9632", "nxp,pca9633" or "nxp,pca9634"
+
+Optional properties:
+- nxp,totem-pole : use totem pole (push-pull) instead of default open-drain
+- nxp,hw-blink : use hardware blinking instead of software blinking
+
+Each led is represented as a sub-node of the nxp,pca963x device.
+
+LED sub-node properties:
+- label : (optional) see Documentation/devicetree/bindings/leds/common.txt
+- reg : number of LED line (could be from 0 to 3  in pca9632 or pca9633
+               or 0 to 7 in pca9634)
+- linux,default-trigger : (optional)
+   see Documentation/devicetree/bindings/leds/common.txt
+
+Examples:
+
+pca9632: pca9632 {
+       compatible = "nxp,pca9632";
+       #address-cells = <1>;
+       #size-cells = <0>;
+       reg = <0x62>;
+
+       red@0 {
+               label = "red";
+               reg = <0>;
+               linux,default-trigger = "none";
+       };
+       green@1 {
+               label = "green";
+               reg = <1>;
+               linux,default-trigger = "none";
+       };
+       blue@2 {
+               label = "blue";
+               reg = <2>;
+               linux,default-trigger = "none";
+       };
+       unused@3 {
+               label = "unused";
+               reg = <3>;
+               linux,default-trigger = "none";
+       };
+};
diff --git a/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt b/Documentation/devicetree/bindings/pci/ralink,rt3883-pci.txt
new file mode 100644 (file)
index 0000000..8e0a1eb
--- /dev/null
@@ -0,0 +1,190 @@
+* Mediatek/Ralink RT3883 PCI controller
+
+1) Main node
+
+   Required properties:
+
+   - compatible: must be "ralink,rt3883-pci"
+
+   - reg: specifies the physical base address of the controller and
+     the length of the memory mapped region.
+
+   - #address-cells: specifies the number of cells needed to encode an
+     address. The value must be 1.
+
+   - #size-cells: specifies the number of cells used to represent the size
+     of an address. The value must be 1.
+
+   - ranges: specifies the translation between child address space and parent
+     address space
+
+  Optional properties:
+
+   - status: indicates the operational status of the device.
+     Value must be either "disabled" or "okay".
+
+2) Child nodes
+
+   The main node must have two child nodes which describes the built-in
+   interrupt controller and the PCI host bridge.
+
+   a) Interrupt controller:
+
+   Required properties:
+
+   - interrupt-controller: identifies the node as an interrupt controller
+
+   - #address-cells: specifies the number of cells needed to encode an
+     address. The value must be 0. As such, 'interrupt-map' nodes do not
+     have to specify a parent unit address.
+
+   - #interrupt-cells: specifies the number of cells needed to encode an
+     interrupt source. The value must be 1.
+
+   - interrupt-parent: the phandle for the interrupt controller that
+     services interrupts for this device.
+
+   - interrupts: specifies the interrupt source of the parent interrupt
+     controller. The format of the interrupt specifier depends on the
+     parent interrupt controller.
+
+   b) PCI host bridge:
+
+   Required properties:
+
+   - #address-cells: specifies the number of cells needed to encode an
+     address. The value must be 0.
+
+   - #size-cells: specifies the number of cells used to represent the size
+     of an address. The value must be 2.
+
+   - #interrupt-cells: specifies the number of cells needed to encode an
+     interrupt source. The value must be 1.
+
+   - device_type: must be "pci"
+
+   - bus-range: PCI bus numbers covered
+
+   - ranges: specifies the ranges for the PCI memory and I/O regions
+
+   - interrupt-map-mask,
+   - interrupt-map: standard PCI properties to define the mapping of the
+     PCI interface to interrupt numbers.
+
+   The PCI host bridge node migh have additional sub-nodes representing
+   the onboard PCI devices/PCI slots. Each such sub-node must have the
+   following mandatory properties:
+
+     - reg: used only for interrupt mapping, so only the first four bytes
+       are used to refer to the correct bus number and device number.
+
+     - device_type: must be "pci"
+
+   If a given sub-node represents a PCI bridge it must have following
+   mandatory properties as well:
+
+     - #address-cells: must be set to <3>
+
+     - #size-cells: must set to <2>
+
+     - #interrupt-cells: must be set to <1>
+
+     - interrupt-map-mask,
+     - interrupt-map: standard PCI properties to define the mapping of the
+       PCI interface to interrupt numbers.
+
+   Besides the required properties the sub-nodes may have these optional
+   properties:
+
+     - status: indicates the operational status of the sub-node.
+       Value must be either "disabled" or "okay".
+
+3) Example:
+
+   a) SoC specific dtsi file:
+
+       pci@10140000 {
+               compatible = "ralink,rt3883-pci";
+               reg = <0x10140000 0x20000>;
+               #address-cells = <1>;
+               #size-cells = <1>;
+               ranges; /* direct mapping */
+
+               status = "disabled";
+
+               pciintc: interrupt-controller {
+                       interrupt-controller;
+                       #address-cells = <0>;
+                       #interrupt-cells = <1>;
+
+                       interrupt-parent = <&cpuintc>;
+                       interrupts = <4>;
+               };
+
+               host-bridge {
+                       #address-cells = <3>;
+                       #size-cells = <2>;
+                       #interrupt-cells = <1>;
+
+                       device_type = "pci";
+
+                       bus-range = <0 255>;
+                       ranges = <
+                               0x02000000 0 0x00000000 0x20000000 0 0x10000000 /* pci memory */
+                               0x01000000 0 0x00000000 0x10160000 0 0x00010000 /* io space */
+                       >;
+
+                       interrupt-map-mask = <0xf800 0 0 7>;
+                       interrupt-map = <
+                               /* IDSEL 17 */
+                               0x8800 0 0 1 &pciintc 18
+                               0x8800 0 0 2 &pciintc 18
+                               0x8800 0 0 3 &pciintc 18
+                               0x8800 0 0 4 &pciintc 18
+                               /* IDSEL 18 */
+                               0x9000 0 0 1 &pciintc 19
+                               0x9000 0 0 2 &pciintc 19
+                               0x9000 0 0 3 &pciintc 19
+                               0x9000 0 0 4 &pciintc 19
+                       >;
+
+                       pci-bridge@1 {
+                               reg = <0x0800 0 0 0 0>;
+                               device_type = "pci";
+                               #interrupt-cells = <1>;
+                               #address-cells = <3>;
+                               #size-cells = <2>;
+
+                               interrupt-map-mask = <0x0 0 0 0>;
+                               interrupt-map = <0x0 0 0 0 &pciintc 20>;
+
+                               status = "disabled";
+                       };
+
+                       pci-slot@17 {
+                               reg = <0x8800 0 0 0 0>;
+                               device_type = "pci";
+
+                               status = "disabled";
+                       };
+
+                       pci-slot@18 {
+                               reg = <0x9000 0 0 0 0>;
+                               device_type = "pci";
+
+                               status = "disabled";
+                       };
+               };
+       };
+
+   b) Board specific dts file:
+
+       pci@10140000 {
+               status = "okay";
+
+               host-bridge {
+                       pci-bridge@1 {
+                               status = "okay";
+                       };
+               };
+       };
diff --git a/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt b/Documentation/devicetree/bindings/rtc/moxa,moxart-rtc.txt
new file mode 100644 (file)
index 0000000..c9d3ac1
--- /dev/null
@@ -0,0 +1,17 @@
+MOXA ART real-time clock
+
+Required properties:
+
+- compatible : Should be "moxa,moxart-rtc"
+- gpio-rtc-sclk : RTC sclk gpio, with zero flags
+- gpio-rtc-data : RTC data gpio, with zero flags
+- gpio-rtc-reset : RTC reset gpio, with zero flags
+
+Example:
+
+       rtc: rtc {
+               compatible = "moxa,moxart-rtc";
+               gpio-rtc-sclk = <&gpio 5 0>;
+               gpio-rtc-data = <&gpio 6 0>;
+               gpio-rtc-reset = <&gpio 7 0>;
+       };
index b47aa41..5a0f02d 100644 (file)
@@ -1,7 +1,11 @@
 TI Real Time Clock
 
 Required properties:
-- compatible: "ti,da830-rtc"
+- compatible:
+       - "ti,da830-rtc"  - for RTC IP used similar to that on DA8xx SoC family.
+       - "ti,am3352-rtc" - for RTC IP used similar to that on AM335x SoC family.
+                           This RTC IP has special WAKE-EN Register to enable
+                           Wakeup generation for event Alarm.
 - reg: Address range of rtc register set
 - interrupts: rtc timer, alarm interrupts in order
 - interrupt-parent: phandle for the interrupt controller
diff --git a/Documentation/devicetree/bindings/rtc/rtc-palmas.txt b/Documentation/devicetree/bindings/rtc/rtc-palmas.txt
new file mode 100644 (file)
index 0000000..adbccc0
--- /dev/null
@@ -0,0 +1,33 @@
+Palmas RTC controller bindings
+
+Required properties:
+- compatible:
+  - "ti,palmas-rtc" for palma series of the RTC controller
+- interrupt-parent: Parent interrupt device, must be handle of palmas node.
+- interrupts: Interrupt number of RTC submodule on device.
+
+Optional properties:
+
+- ti,backup-battery-chargeable: The Palmas series device like TPS65913 or
+       TPS80036 supports the backup battery for powering the RTC when main
+       battery is removed or in very low power state. The backup battery
+       can be chargeable or non-chargeable. This flag will tells whether
+       battery is chargeable or not. If charging battery then driver can
+       enable the charging.
+- ti,backup-battery-charge-high-current: Enable high current charging in
+       backup battery. Device supports the < 100mA and > 100mA charging.
+       The high current will be > 100mA. Absence of this property will
+       charge battery to lower current i.e. < 100mA.
+
+Example:
+       palmas: tps65913@58 {
+               ...
+               palmas_rtc: rtc {
+                       compatible = "ti,palmas-rtc";
+                       interrupt-parent = <&palmas>;
+                       interrupts = <8 0>;
+                       ti,backup-battery-chargeable;
+                       ti,backup-battery-charge-high-current;
+               };
+               ...
+       };
index 7e5fd37..f0062c5 100644 (file)
@@ -2,13 +2,17 @@
 
 Required properties:
 
-- compatible: "marvell,mvebu-audio"
+- compatible:
+  "marvell,kirkwood-audio" for Kirkwood platforms
+  "marvell,dove-audio" for Dove platforms
 
 - reg: physical base address of the controller and length of memory mapped
   region.
 
-- interrupts: list of two irq numbers.
-  The first irq is used for data flow and the second one is used for errors.
+- interrupts:
+  with "marvell,kirkwood-audio", the audio interrupt
+  with "marvell,dove-audio", a list of two interrupts, the first for
+  the data flow, and the second for errors.
 
 - clocks: one or two phandles.
   The first one is mandatory and defines the internal clock.
@@ -21,7 +25,7 @@ Required properties:
 Example:
 
 i2s1: audio-controller@b4000 {
-       compatible = "marvell,mvebu-audio";
+       compatible = "marvell,dove-audio";
        reg = <0xb4000 0x2210>;
        interrupts = <21>, <22>;
        clocks = <&gate_clk 13>;
diff --git a/Documentation/devicetree/bindings/thermal/exynos-thermal.txt b/Documentation/devicetree/bindings/thermal/exynos-thermal.txt
new file mode 100644 (file)
index 0000000..284f530
--- /dev/null
@@ -0,0 +1,55 @@
+* Exynos Thermal Management Unit (TMU)
+
+** Required properties:
+
+- compatible : One of the following:
+              "samsung,exynos4412-tmu"
+              "samsung,exynos4210-tmu"
+              "samsung,exynos5250-tmu"
+              "samsung,exynos5440-tmu"
+- interrupt-parent : The phandle for the interrupt controller
+- reg : Address range of the thermal registers. For soc's which has multiple
+       instances of TMU and some registers are shared across all TMU's like
+       interrupt related then 2 set of register has to supplied. First set
+       belongs to each instance of TMU and second set belongs to common TMU
+       registers.
+- interrupts : Should contain interrupt for thermal system
+- clocks : The main clock for TMU device
+- clock-names : Thermal system clock name
+- vtmu-supply: This entry is optional and provides the regulator node supplying
+               voltage to TMU. If needed this entry can be placed inside
+               board/platform specific dts file.
+
+Example 1):
+
+       tmu@100C0000 {
+               compatible = "samsung,exynos4412-tmu";
+               interrupt-parent = <&combiner>;
+               reg = <0x100C0000 0x100>;
+               interrupts = <2 4>;
+               clocks = <&clock 383>;
+               clock-names = "tmu_apbif";
+               status = "disabled";
+               vtmu-supply = <&tmu_regulator_node>;
+       };
+
+Example 2):
+
+       tmuctrl_0: tmuctrl@160118 {
+               compatible = "samsung,exynos5440-tmu";
+               reg = <0x160118 0x230>, <0x160368 0x10>;
+               interrupts = <0 58 0>;
+               clocks = <&clock 21>;
+               clock-names = "tmu_apbif";
+       };
+
+Note: For multi-instance tmu each instance should have an alias correctly
+numbered in "aliases" node.
+
+Example:
+
+aliases {
+       tmuctrl0 = &tmuctrl_0;
+       tmuctrl1 = &tmuctrl_1;
+       tmuctrl2 = &tmuctrl_2;
+};
diff --git a/Documentation/devicetree/bindings/thermal/imx-thermal.txt b/Documentation/devicetree/bindings/thermal/imx-thermal.txt
new file mode 100644 (file)
index 0000000..541c25e
--- /dev/null
@@ -0,0 +1,17 @@
+* Temperature Monitor (TEMPMON) on Freescale i.MX SoCs
+
+Required properties:
+- compatible : "fsl,imx6q-thermal"
+- fsl,tempmon : phandle pointer to system controller that contains TEMPMON
+  control registers, e.g. ANATOP on imx6q.
+- fsl,tempmon-data : phandle pointer to fuse controller that contains TEMPMON
+  calibration data, e.g. OCOTP on imx6q.  The details about calibration data
+  can be found in SoC Reference Manual.
+
+Example:
+
+tempmon {
+       compatible = "fsl,imx6q-tempmon";
+       fsl,tempmon = <&anatop>;
+       fsl,tempmon-data = <&ocotp>;
+};
index 3638112..f455182 100644 (file)
@@ -2,14 +2,40 @@ Marvell Armada 370 and Armada XP Timers
 ---------------------------------------
 
 Required properties:
-- compatible: Should be "marvell,armada-370-xp-timer"
+- compatible: Should be either "marvell,armada-370-timer" or
+  "marvell,armada-xp-timer" as appropriate.
 - interrupts: Should contain the list of Global Timer interrupts and
   then local timer interrupts
 - reg: Should contain location and length for timers register. First
   pair for the Global Timer registers, second pair for the
   local/private timers.
-- clocks: clock driving the timer hardware
 
-Optional properties:
-- marvell,timer-25Mhz: Tells whether the Global timer supports the 25
-  Mhz fixed mode (available on Armada XP and not on Armada 370)
+Clocks required for compatible = "marvell,armada-370-timer":
+- clocks : Must contain a single entry describing the clock input
+
+Clocks required for compatible = "marvell,armada-xp-timer":
+- clocks : Must contain an entry for each entry in clock-names.
+- clock-names : Must include the following entries:
+  "nbclk" (L2/coherency fabric clock),
+  "fixed" (Reference 25 MHz fixed-clock).
+
+Examples:
+
+- Armada 370:
+
+       timer {
+               compatible = "marvell,armada-370-timer";
+               reg = <0x20300 0x30>, <0x21040 0x30>;
+               interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
+               clocks = <&coreclk 2>;
+       };
+
+- Armada XP:
+
+       timer {
+               compatible = "marvell,armada-xp-timer";
+               reg = <0x20300 0x30>, <0x21040 0x30>;
+               interrupts = <37>, <38>, <39>, <40>, <5>, <6>;
+               clocks = <&coreclk 2>, <&refclk>;
+               clock-names = "nbclk", "fixed";
+       };
index e31a2a9..505e711 100644 (file)
@@ -407,6 +407,18 @@ Being able to mmap an export dma-buf buffer object has 2 main use-cases:
    interesting ways depending upong the exporter (if userspace starts depending
    upon this implicit synchronization).
 
+Other Interfaces Exposed to Userspace on the dma-buf FD
+------------------------------------------------------
+
+- Since kernel 3.12 the dma-buf FD supports the llseek system call, but only
+  with offset=0 and whence=SEEK_END|SEEK_SET. SEEK_SET is supported to allow
+  the usual size discover pattern size = SEEK_END(0); SEEK_SET(0). Every other
+  llseek operation will report -EINVAL.
+
+  If llseek on dma-buf FDs isn't support the kernel will report -ESPIPE for all
+  cases. Userspace can use this to detect support for discovering the dma-buf
+  size using llseek.
+
 Miscellaneous notes
 -------------------
 
diff --git a/Documentation/filesystems/cifs.txt b/Documentation/filesystems/cifs.txt
deleted file mode 100644 (file)
index 49cc923..0000000
+++ /dev/null
@@ -1,51 +0,0 @@
-  This is the client VFS module for the Common Internet File System
-  (CIFS) protocol which is the successor to the Server Message Block 
-  (SMB) protocol, the native file sharing mechanism for most early
-  PC operating systems.  CIFS is fully supported by current network
-  file servers such as Windows 2000, Windows 2003 (including  
-  Windows XP) as well by Samba (which provides excellent CIFS
-  server support for Linux and many other operating systems), so
-  this network filesystem client can mount to a wide variety of
-  servers.  The smbfs module should be used instead of this cifs module
-  for mounting to older SMB servers such as OS/2.  The smbfs and cifs
-  modules can coexist and do not conflict.  The CIFS VFS filesystem
-  module is designed to work well with servers that implement the
-  newer versions (dialects) of the SMB/CIFS protocol such as Samba, 
-  the program written by Andrew Tridgell that turns any Unix host 
-  into a SMB/CIFS file server.
-
-  The intent of this module is to provide the most advanced network
-  file system function for CIFS compliant servers, including better
-  POSIX compliance, secure per-user session establishment, high
-  performance safe distributed caching (oplock), optional packet
-  signing, large files, Unicode support and other internationalization
-  improvements. Since both Samba server and this filesystem client support
-  the CIFS Unix extensions, the combination can provide a reasonable 
-  alternative to NFSv4 for fileserving in some Linux to Linux environments,
-  not just in Linux to Windows environments.
-
-  This filesystem has an optional mount utility (mount.cifs) that can
-  be obtained from the project page and installed in the path in the same
-  directory with the other mount helpers (such as mount.smbfs). 
-  Mounting using the cifs filesystem without installing the mount helper
-  requires specifying the server's ip address.
-
-  For Linux 2.4:
-    mount //anything/here /mnt_target -o
-            user=username,pass=password,unc=//ip_address_of_server/sharename
-
-  For Linux 2.5: 
-    mount //ip_address_of_server/sharename /mnt_target -o user=username, pass=password
-
-
-  For more information on the module see the project page at
-
-      http://us1.samba.org/samba/Linux_CIFS_client.html 
-
-  For more information on CIFS see:
-
-      http://www.snia.org/tech_activities/CIFS
-
-  or the Samba site:
-     
-      http://www.samba.org
diff --git a/Documentation/filesystems/cifs/AUTHORS b/Documentation/filesystems/cifs/AUTHORS
new file mode 100644 (file)
index 0000000..ca4a67a
--- /dev/null
@@ -0,0 +1,56 @@
+Original Author
+===============
+Steve French (sfrench@samba.org)
+
+The author wishes to express his appreciation and thanks to:
+Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
+improvements. Thanks to IBM for allowing me time and test resources to pursue
+this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
+the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
+Jeremy Allison of the Samba team has done invaluable work in adding the server
+side of the original CIFS Unix extensions and reviewing and implementing
+portions of the newer CIFS POSIX extensions into the Samba 3 file server. Thank
+Dave Boutcher of IBM Rochester (author of the OS/400 smb/cifs filesystem client)
+for proving years ago that very good smb/cifs clients could be done on Unix-like
+operating systems.  Volker Lendecke, Andrew Tridgell, Urban Widmark, John 
+Newbigin and others for their work on the Linux smbfs module.  Thanks to
+the other members of the Storage Network Industry Association CIFS Technical
+Workgroup for their work specifying this highly complex protocol and finally
+thanks to the Samba team for their technical advice and encouragement.
+
+Patch Contributors
+------------------
+Zwane Mwaikambo
+Andi Kleen
+Amrut Joshi
+Shobhit Dayal
+Sergey Vlasov
+Richard Hughes
+Yury Umanets
+Mark Hamzy (for some of the early cifs IPv6 work)
+Domen Puncer
+Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
+Vince Negri and Dave Stahl (for finding an important caching bug)
+Adrian Bunk (kcalloc cleanups)
+Miklos Szeredi 
+Kazeon team for various fixes especially for 2.4 version.
+Asser Ferno (Change Notify support)
+Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
+Gunter Kukkukk (testing and suggestions for support of old servers)
+Igor Mammedov (DFS support)
+Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
+Scott Lovenberg
+
+Test case and Bug Report contributors
+-------------------------------------
+Thanks to those in the community who have submitted detailed bug reports
+and debug of problems they have found:  Jochen Dolze, David Blaine,
+Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori,
+Lars Muller, Urban Widmark, Massimiliano Ferrero, Howard Owen,
+Olaf Kirch, Kieron Briggs, Nick Millington and others. Also special
+mention to the Stanford Checker (SWAT) which pointed out many minor
+bugs in error paths.  Valuable suggestions also have come from Al Viro
+and Dave Miller.
+
+And thanks to the IBM LTC and Power test teams and SuSE testers for
+finding multiple bugs during excellent stress test runs.
diff --git a/Documentation/filesystems/cifs/CHANGES b/Documentation/filesystems/cifs/CHANGES
new file mode 100644 (file)
index 0000000..bc0025c
--- /dev/null
@@ -0,0 +1,1065 @@
+Version 1.62
+------------
+Add sockopt=TCP_NODELAY mount option. EA (xattr) routines hardened
+to more strictly handle corrupt frames.
+
+Version 1.61
+------------
+Fix append problem to Samba servers (files opened with O_APPEND could
+have duplicated data). Fix oops in cifs_lookup. Workaround problem
+mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
+Disable use of server inode numbers when server only
+partially supports them (e.g. for one server querying inode numbers on
+FindFirst fails but QPathInfo queries works). Fix oops with dfs in 
+cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
+for OpenOffice when on forcedirectio mount e.g.)
+
+Version 1.60
+-------------
+Fix memory leak in reconnect.  Fix oops in DFS mount error path.
+Set s_maxbytes to smaller (the max that vfs can handle) so that
+sendfile will now work over cifs mounts again.  Add noforcegid
+and noforceuid mount parameters. Fix small mem leak when using
+ntlmv2. Fix 2nd mount to same server but with different port to
+be allowed (rather than reusing the 1st port) - only when the
+user explicitly overrides the port on the 2nd mount.
+
+Version 1.59
+------------
+Client uses server inode numbers (which are persistent) rather than
+client generated ones by default (mount option "serverino" turned
+on by default if server supports it).  Add forceuid and forcegid
+mount options (so that when negotiating unix extensions specifying
+which uid mounted does not immediately force the server's reported
+uids to be overridden).  Add support for scope mount parm. Improve
+hard link detection to use same inode for both.  Do not set
+read-only dos attribute on directories (for chmod) since Windows
+explorer special cases this attribute bit for directories for
+a different purpose.
+
+Version 1.58
+------------
+Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
+when the UTF-8 string is composed of unusually long (more than 4 byte) converted
+characters. Add support for mounting root of a share which redirects immediately
+to DFS target. Convert string conversion functions from Unicode to more
+accurately mark string length before allocating memory (which may help the
+rare cases where a UTF-8 string is much larger than the UCS2 string that
+we converted from).  Fix endianness of the vcnum field used during
+session setup to distinguish multiple mounts to same server from different
+userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
+flag to be set to 2, and mount must enable krb5 to turn on extended security).
+Performance of file create to Samba improved (posix create on lookup
+removes 1 of 2 network requests sent on file create)
+Version 1.57
+------------
+Improve support for multiple security contexts to the same server. We
+used to use the same "vcnumber" for all connections which could cause
+the server to treat subsequent connections, especially those that
+are authenticated as guest, as reconnections, invalidating the earlier
+user's smb session.  This fix allows cifs to mount multiple times to the
+same server with different userids without risking invalidating earlier
+established security contexts.  fsync now sends SMB Flush operation
+to better ensure that we wait for server to write all of the data to
+server disk (not just write it over the network).  Add new mount
+parameter to allow user to disable sending the (slow) SMB flush on
+fsync if desired (fsync still flushes all cached write data to the server).
+Posix file open support added (turned off after one attempt if server
+fails to support it properly, as with Samba server versions prior to 3.3.2)
+Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
+little memory for the "nativeFileSystem" field returned by the server
+during mount).  Endian convert inode numbers if necessary (makes it easier
+to compare inode numbers on network files from big endian systems). 
+
+Version 1.56
+------------
+Add "forcemandatorylock" mount option to allow user to use mandatory
+rather than posix (advisory) byte range locks, even though server would
+support posix byte range locks.  Fix query of root inode when prefixpath
+specified and user does not have access to query information about the
+top of the share.  Fix problem in 2.6.28 resolving DFS paths to
+Samba servers (worked to Windows).  Fix rmdir so that pending search
+(readdir) requests do not get invalid results which include the now
+removed directory.  Fix oops in cifs_dfs_ref.c when prefixpath is not reachable
+when using DFS.  Add better file create support to servers which support
+the CIFS POSIX protocol extensions (this adds support for new flags
+on create, and improves semantics for write of locked ranges).
+
+Version 1.55
+------------
+Various fixes to make delete of open files behavior more predictable
+(when delete of an open file fails we mark the file as "delete-on-close"
+in a way that more servers accept, but only if we can first rename the
+file to a temporary name).  Add experimental support for more safely
+handling fcntl(F_SETLEASE).  Convert cifs to using blocking tcp
+sends, and also let tcp autotune the socket send and receive buffers.
+This reduces the number of EAGAIN errors returned by TCP/IP in
+high stress workloads (and the number of retries on socket writes
+when sending large SMBWriteX requests).  Fix case in which a portion of
+data can in some cases not get written to the file on the server before the
+file is closed.  Fix DFS parsing to properly handle path consumed field,
+and to handle certain codepage conversions better.  Fix mount and
+umount race that can cause oops in mount or umount or reconnect.
+
+Version 1.54
+------------
+Fix premature write failure on congested networks (we would give up
+on EAGAIN from the socket too quickly on large writes).
+Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
+Fix endian problems in acl (mode from/to cifs acl) on bigendian
+architectures.  Fix problems with preserving timestamps on copying open
+files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
+on parent directory when server supports Unix Extensions but not POSIX
+create. Update cifs.upcall version to handle new Kerberos sec flags
+(this requires update of cifs.upcall program from Samba).  Fix memory leak
+on dns_upcall (resolving DFS referralls).  Fix plain text password
+authentication (requires setting SecurityFlags to 0x30030 to enable
+lanman and plain text though).  Fix writes to be at correct offset when
+file is open with O_APPEND and file is on a directio (forcediretio) mount.
+Fix bug in rewinding readdir directory searches.  Add nodfs mount option.
+
+Version 1.53
+------------
+DFS support added (Microsoft Distributed File System client support needed
+for referrals which enable a hierarchical name space among servers).
+Disable temporary caching of mode bits to servers which do not support
+storing of mode (e.g. Windows servers, when client mounts without cifsacl
+mount option) and add new "dynperm" mount option to enable temporary caching
+of mode (enable old behavior).  Fix hang on mount caused when server crashes
+tcp session during negotiate protocol.
+
+Version 1.52
+------------
+Fix oops on second mount to server when null auth is used.
+Enable experimental Kerberos support.  Return writebehind errors on flush
+and sync so that events like out of disk space get reported properly on
+cached files. Fix setxattr failure to certain Samba versions. Fix mount
+of second share to disconnected server session (autoreconnect on this).
+Add ability to modify cifs acls for handling chmod (when mounted with
+cifsacl flag). Fix prefixpath path separator so we can handle mounts
+with prefixpaths longer than one directory (one path component) when
+mounted to Windows servers.  Fix slow file open when cifsacl
+enabled. Fix memory leak in FindNext when the SMB call returns -EBADF.
+
+
+Version 1.51
+------------
+Fix memory leak in statfs when mounted to very old servers (e.g.
+Windows 9x).  Add new feature "POSIX open" which allows servers
+which support the current POSIX Extensions to provide better semantics
+(e.g. delete for open files opened with posix open).  Take into
+account umask on posix mkdir not just older style mkdir.  Add
+ability to mount to IPC$ share (which allows CIFS named pipes to be
+opened, read and written as if they were files).  When 1st tree
+connect fails (e.g. due to signing negotiation failure) fix
+leak that causes cifsd not to stop and rmmod to fail to cleanup
+cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
+bigendian architectures. Fix possible memory corruption when
+EAGAIN returned on kern_recvmsg. Return better error if server
+requires packet signing but client has disabled it. When mounted
+with cifsacl mount option - mode bits are approximated based
+on the contents of the ACL of the file or directory. When cifs
+mount helper is missing convert make sure that UNC name 
+has backslash (not forward slash) between ip address of server
+and the share name.
+
+Version 1.50
+------------
+Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
+done with "serverino" mount option).  Add support for POSIX Unlink
+(helps with certain sharing violation cases when server such as
+Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
+mount option to allow disabling the CIFS Unix Extensions for just
+that mount. Fix hang on spinlock in find_writable_file (race when
+reopening file after session crash).  Byte range unlock request to
+windows server could unlock more bytes (on server copy of file)
+than intended if start of unlock request is well before start of
+a previous byte range lock that we issued.
+
+Version 1.49
+------------
+IPv6 support.  Enable ipv6 addresses to be passed on mount (put the ipv6
+address after the "ip=" mount option, at least until mount.cifs is fixed to
+handle DNS host to ipv6 name translation).  Accept override of uid or gid
+on mount even when Unix Extensions are negotiated (it used to be ignored
+when Unix Extensions were ignored).  This allows users to override the
+default uid and gid for files when they are certain that the uids or
+gids on the server do not match those of the client.  Make "sec=none"
+mount override username (so that null user connection is attempted)
+to match what documentation said. Support for very large reads, over 127K,
+available to some newer servers (such as Samba 3.0.26 and later but
+note that it also requires setting CIFSMaxBufSize at module install
+time to a larger value which may hurt performance in some cases).
+Make sign option force signing (or fail if server does not support it).
+
+Version 1.48
+------------
+Fix mtime bouncing around from local idea of last write times to remote time.
+Fix hang (in i_size_read) when simultaneous size update of same remote file
+on smp system corrupts sequence number. Do not reread unnecessarily partial page
+(which we are about to overwrite anyway) when writing out file opened rw.
+When DOS attribute of file on non-Unix server's file changes on the server side
+from read-only back to read-write, reflect this change in default file mode
+(we had been leaving a file's mode read-only until the inode were reloaded).
+Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
+when archive dos attribute not set and we are changing mode back to writeable
+on server which does not support the Unix Extensions).  Remove read only dos
+attribute on chmod when adding any write permission (ie on any of
+user/group/other (not all of user/group/other ie  0222) when
+mounted to windows.  Add support for POSIX MkDir (slight performance
+enhancement and eliminates the network race between the mkdir and set 
+path info of the mode).
+
+
+Version 1.47
+------------
+Fix oops in list_del during mount caused by unaligned string.
+Fix file corruption which could occur on some large file
+copies caused by writepages page i/o completion bug.
+Seek to SEEK_END forces check for update of file size for non-cached
+files. Allow file size to be updated on remote extend of locally open,
+non-cached file.  Fix reconnect to newer Samba servers (or other servers
+which support the CIFS Unix/POSIX extensions) so that we again tell the
+server the Unix/POSIX cifs capabilities which we support (SetFSInfo).
+Add experimental support for new POSIX Open/Mkdir (which returns
+stat information on the open, and allows setting the mode).
+
+Version 1.46
+------------
+Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
+Allow null user to be specified on mount ("username="). Do not return
+EINVAL on readdir when filldir fails due to overwritten blocksize
+(fixes FC problem).  Return error in rename 2nd attempt retry (ie report
+if rename by handle also fails, after rename by path fails, we were
+not reporting whether the retry worked or not). Fix NTLMv2 to
+work to Windows servers (mount with option "sec=ntlmv2").
+
+Version 1.45
+------------
+Do not time out lockw calls when using posix extensions. Do not
+time out requests if server still responding reasonably fast
+on requests on other threads.  Improve POSIX locking emulation,
+(lock cancel now works, and unlock of merged range works even
+to Windows servers now).  Fix oops on mount to lanman servers
+(win9x, os/2 etc.) when null password.  Do not send listxattr
+(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
+problem (instantiate inodes/dentries in right order for readdir).
+
+Version 1.44
+------------
+Rewritten sessionsetup support, including support for legacy SMB
+session setup needed for OS/2 and older servers such as Windows 95 and 98.
+Fix oops on ls to OS/2 servers.  Add support for level 1 FindFirst
+so we can do search (ls etc.) to OS/2.  Do not send NTCreateX
+or recent levels of FindFirst unless server says it supports NT SMBs
+(instead use legacy equivalents from LANMAN dialect). Fix to allow
+NTLMv2 authentication support (now can use stronger password hashing
+on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
+Allow override of global cifs security flags on mount via "sec=" option(s).
+
+Version 1.43
+------------
+POSIX locking to servers which support CIFS POSIX Extensions
+(disabled by default controlled by proc/fs/cifs/Experimental).
+Handle conversion of long share names (especially Asian languages)
+to Unicode during mount. Fix memory leak in sess struct on reconnect.
+Fix rare oops after acpi suspend.  Fix O_TRUNC opens to overwrite on
+cifs open which helps rare case when setpathinfo fails or server does
+not support it. 
+
+Version 1.42
+------------
+Fix slow oplock break when mounted to different servers at the same time and
+the tids match and we try to find matching fid on wrong server. Fix read
+looping when signing required by server (2.6.16 kernel only). Fix readdir
+vs. rename race which could cause each to hang. Return . and .. even
+if server does not.  Allow searches to skip first three entries and
+begin at any location. Fix oops in find_writeable_file.
+
+Version 1.41
+------------
+Fix NTLMv2 security (can be enabled in /proc/fs/cifs) so customers can
+configure stronger authentication.  Fix sfu symlinks so they can
+be followed (not just recognized).  Fix wraparound of bcc on
+read responses when buffer size over 64K and also fix wrap of
+max smb buffer size when CIFSMaxBufSize over 64K.  Fix oops in
+cifs_user_read and cifs_readpages (when EAGAIN on send of smb
+on socket is returned over and over).  Add POSIX (advisory) byte range
+locking support (requires server with newest CIFS UNIX Extensions
+to the protocol implemented). Slow down negprot slightly in port 139
+RFC1001 case to give session_init time on buggy servers.
+
+Version 1.40
+------------
+Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
+of readpages by eliminating one extra memcpy. Allow update of file size
+from remote server even if file is open for write as long as mount is
+directio.  Recognize share mode security and send NTLM encrypted password
+on tree connect if share mode negotiated.
+
+Version 1.39
+------------
+Defer close of a file handle slightly if pending writes depend on that handle
+(this reduces the EBADF bad file handle errors that can be logged under heavy
+stress on writes). Modify cifs Kconfig options to expose CONFIG_CIFS_STATS2 
+Fix SFU style symlinks and mknod needed for servers which do not support the
+CIFS Unix Extensions.  Fix setfacl/getfacl on bigendian. Timeout negative
+dentries so files that the client sees as deleted but that later get created
+on the server will be recognized.  Add client side permission check on setattr.
+Timeout stuck requests better (where server has never responded or sent corrupt
+responses)
+
+Version 1.38
+------------
+Fix tcp socket retransmission timeouts (e.g. on ENOSPACE from the socket)
+to be smaller at first (but increasing) so large write performance performance
+over GigE is better.  Do not hang thread on illegal byte range lock response
+from Windows (Windows can send an RFC1001 size which does not match smb size) by
+allowing an SMBs TCP length to be up to a few bytes longer than it should be.
+wsize and rsize can now be larger than negotiated buffer size if server
+supports large readx/writex, even when directio mount flag not specified.
+Write size will in many cases now be 16K instead of 4K which greatly helps
+file copy performance on lightly loaded networks.  Fix oops in dnotify
+when experimental config flag enabled. Make cifsFYI more granular.
+
+Version 1.37
+------------
+Fix readdir caching when unlink removes file in current search buffer,
+and this is followed by a rewind search to just before the deleted entry.
+Do not attempt to set ctime unless atime and/or mtime change requested
+(most servers throw it away anyway). Fix length check of received smbs
+to be more accurate. Fix big endian problem with mapchars mount option,
+and with a field returned by statfs.
+
+Version 1.36
+------------
+Add support for mounting to older pre-CIFS servers such as Windows9x and ME.
+For these older servers, add option for passing netbios name of server in
+on mount (servernetbiosname).  Add suspend support for power management, to
+avoid cifsd thread preventing software suspend from working.
+Add mount option for disabling the default behavior of sending byte range lock
+requests to the server (necessary for certain applications which break with
+mandatory lock behavior such as Evolution), and also mount option for
+requesting case insensitive matching for path based requests (requesting
+case sensitive is the default).
+
+Version 1.35
+------------
+Add writepage performance improvements.  Fix path name conversions
+for long filenames on mounts which were done with "mapchars" mount option
+specified.  Ensure multiplex ids do not collide.  Fix case in which 
+rmmod can oops if done soon after last unmount.  Fix truncated
+search (readdir) output when resume filename was a long filename.
+Fix filename conversion when mapchars mount option was specified and
+filename was a long filename.
+
+Version 1.34
+------------
+Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
+Do not oops if root user kills cifs oplock kernel thread or
+kills the cifsd thread (NB: killing the cifs kernel threads is not
+recommended, unmount and rmmod cifs will kill them when they are
+no longer needed).  Fix readdir to ASCII servers (ie older servers
+which do not support Unicode) and also require asterisk.
+Fix out of memory case in which data could be written one page
+off in the page cache.
+
+Version 1.33
+------------
+Fix caching problem, in which readdir of directory containing a file
+which was cached could cause the file's time stamp to be updated
+without invalidating the readahead data (so we could get stale
+file data on the client for that file even as the server copy changed).
+Cleanup response processing so cifsd can not loop when abnormally
+terminated.
+
+
+Version 1.32
+------------
+Fix oops in ls when Transact2 FindFirst (or FindNext) returns more than one
+transact response for an SMB request and search entry split across two frames.
+Add support for lsattr (getting ext2/ext3/reiserfs attr flags from the server)
+as new protocol extensions. Do not send Get/Set calls for POSIX ACLs
+unless server explicitly claims to support them in CIFS Unix extensions
+POSIX ACL capability bit. Fix packet signing when multiuser mounting with
+different users from the same client to the same server. Fix oops in
+cifs_close. Add mount option for remapping reserved characters in
+filenames (also allow recognizing files with created by SFU which have any
+of these seven reserved characters, except backslash, to be recognized).
+Fix invalid transact2 message (we were sometimes trying to interpret
+oplock breaks as SMB responses). Add ioctl for checking that the
+current uid matches the uid of the mounter (needed by umount.cifs).
+Reduce the number of large buffer allocations in cifs response processing
+(significantly reduces memory pressure under heavy stress with multiple
+processes accessing the same server at the same time).
+
+Version 1.31
+------------
+Fix updates of DOS attributes and time fields so that files on NT4 servers
+do not get marked delete on close. Display sizes of cifs buffer pools in
+cifs stats. Fix oops in unmount when cifsd thread being killed by 
+shutdown. Add generic readv/writev and aio support. Report inode numbers 
+consistently in readdir and lookup (when serverino mount option is
+specified use the inode number that the server reports - for both lookup
+and readdir, otherwise by default the locally generated inode number is used
+for inodes created in either path since servers are not always able to 
+provide unique inode numbers when exporting multiple volumes from under one
+sharename).
+
+Version 1.30
+------------
+Allow new nouser_xattr mount parm to disable xattr support for user namespace.
+Do not flag user_xattr mount parm in dmesg.  Retry failures setting file time  
+(mostly affects NT4 servers) by retry with handle based network operation. 
+Add new POSIX Query FS Info for returning statfs info more accurately.
+Handle passwords with multiple commas in them.
+
+Version 1.29
+------------
+Fix default mode in sysfs of cifs module parms.  Remove old readdir routine.
+Fix capabilities flags for large readx so as to allow reads larger than 64K.
+
+Version 1.28
+------------
+Add module init parm for large SMB buffer size (to allow it to be changed
+from its default of 16K) which is especially useful for large file copy
+when mounting with the directio mount option. Fix oops after 
+returning from mount when experimental ExtendedSecurity enabled and
+SpnegoNegotiated returning invalid error. Fix case to retry better when 
+peek returns from 1 to 3 bytes on socket which should have more data.
+Fixed path based calls (such as cifs lookup) to handle path names
+longer than 530 (now can handle PATH_MAX). Fix pass through authentication
+from Samba server to DC (Samba required dummy LM password).
+
+Version 1.27
+------------
+Turn off DNOTIFY (directory change notification support) by default
+(unless built with the experimental flag) to fix hang with KDE
+file browser. Fix DNOTIFY flag mappings.  Fix hang (in wait_event
+waiting on an SMB response) in SendReceive when session dies but
+reconnects quickly from another task.  Add module init  parms for
+minimum number of large and small network buffers in the buffer pools,
+and for the maximum number of simultaneous requests.
+
+Version 1.26
+------------
+Add setfacl support to allow setting of ACLs remotely to Samba 3.10 and later
+and other POSIX CIFS compliant servers.  Fix error mapping for getfacl 
+to EOPNOTSUPP when server does not support posix acls on the wire. Fix 
+improperly zeroed buffer in CIFS Unix extensions set times call. 
+
+Version 1.25
+------------
+Fix internationalization problem in cifs readdir with filenames that map to 
+longer UTF-8 strings than the string on the wire was in Unicode.  Add workaround
+for readdir to netapp servers. Fix search rewind (seek into readdir to return 
+non-consecutive entries).  Do not do readdir when server negotiates 
+buffer size to small to fit filename. Add support for reading POSIX ACLs from
+the server (add also acl and noacl mount options).
+
+Version 1.24
+------------
+Optionally allow using server side inode numbers, rather than client generated
+ones by specifying mount option "serverino" - this is required for some apps
+to work which double check hardlinked files and have persistent inode numbers.
+
+Version 1.23
+------------
+Multiple bigendian fixes. On little endian systems (for reconnect after
+network failure) fix tcp session reconnect code so we do not try first
+to reconnect on reverse of port 445. Treat reparse points (NTFS junctions)
+as directories rather than symlinks because we can do follow link on them.
+
+Version 1.22
+------------
+Add config option to enable XATTR (extended attribute) support, mapping
+xattr names in the "user." namespace space to SMB/CIFS EAs. Lots of
+minor fixes pointed out by the Stanford SWAT checker (mostly missing
+or out of order NULL pointer checks in little used error paths).
+
+Version 1.21
+------------
+Add new mount parm to control whether mode check (generic_permission) is done
+on the client.  If Unix extensions are enabled and the uids on the client
+and server do not match, client permission checks are meaningless on
+server uids that do not exist on the client (this does not affect the
+normal ACL check which occurs on the server).  Fix default uid
+on mknod to match create and mkdir. Add optional mount parm to allow
+override of the default uid behavior (in which the server sets the uid
+and gid of newly created files). Normally for network filesystem mounts
+user want the server to set the uid/gid on newly created files (rather than 
+using uid of the client processes you would in a local filesystem).
+
+Version 1.20
+------------
+Make transaction counts more consistent. Merge /proc/fs/cifs/SimultaneousOps
+info into /proc/fs/cifs/DebugData.  Fix oops in rare oops in readdir 
+(in build_wildcard_path_from_dentry).  Fix mknod to pass type field
+(block/char/fifo) properly.  Remove spurious mount warning log entry when
+credentials passed as mount argument. Set major/minor device number in
+inode for block and char devices when unix extensions enabled.
+
+Version 1.19
+------------
+Fix /proc/fs/cifs/Stats and DebugData display to handle larger
+amounts of return data. Properly limit requests to MAX_REQ (50
+is the usual maximum active multiplex SMB/CIFS requests per server).
+Do not kill cifsd (and thus hurt the other SMB session) when more than one
+session to the same server (but with different userids) exists and one
+of the two user's smb sessions is being removed while leaving the other.
+Do not loop reconnecting in cifsd demultiplex thread when admin
+kills the thread without going through unmount.
+
+Version 1.18
+------------
+Do not rename hardlinked files (since that should be a noop). Flush
+cached write behind data when reopening a file after session abend,
+except when already in write. Grab per socket sem during reconnect 
+to avoid oops in sendmsg if overlapping with reconnect. Do not
+reset cached inode file size on readdir for files open for write on 
+client.
+
+
+Version 1.17
+------------
+Update number of blocks in file so du command is happier (in Linux a fake
+blocksize of 512 is required for calculating number of blocks in inode).
+Fix prepare write of partial pages to read in data from server if possible.
+Fix race on tcpStatus field between unmount and reconnection code, causing
+cifsd process sometimes to hang around forever. Improve out of memory
+checks in cifs_filldir
+
+Version 1.16
+------------
+Fix incorrect file size in file handle based setattr on big endian hardware.
+Fix oops in build_path_from_dentry when out of memory.  Add checks for invalid
+and closing file structs in writepage/partialpagewrite.  Add statistics
+for each mounted share (new menuconfig option). Fix endianness problem in
+volume information displayed in /proc/fs/cifs/DebugData (only affects
+affects big endian architectures). Prevent renames while constructing
+path names for open, mkdir and rmdir.
+
+Version 1.15
+------------
+Change to mempools for alloc smb request buffers and multiplex structs
+to better handle low memory problems (and potential deadlocks).
+
+Version 1.14
+------------
+Fix incomplete listings of large directories on Samba servers when Unix
+extensions enabled.  Fix oops when smb_buffer can not be allocated. Fix
+rename deadlock when writing out dirty pages at same time.
+
+Version 1.13
+------------
+Fix open of files in which O_CREATE can cause the mode to change in
+some cases. Fix case in which retry of write overlaps file close.
+Fix PPC64 build error.  Reduce excessive stack usage in smb password
+hashing. Fix overwrite of Linux user's view of file mode to Windows servers.
+
+Version 1.12
+------------
+Fixes for large file copy, signal handling, socket retry, buffer
+allocation and low memory situations.
+
+Version 1.11
+------------
+Better port 139 support to Windows servers (RFC1001/RFC1002 Session_Initialize)
+also now allowing support for specifying client netbiosname.  NT4 support added.
+
+Version 1.10
+------------
+Fix reconnection (and certain failed mounts) to properly wake up the
+blocked users thread so it does not seem hung (in some cases was blocked
+until the cifs receive timeout expired). Fix spurious error logging
+to kernel log when application with open network files killed. 
+
+Version 1.09
+------------
+Fix /proc/fs module unload warning message (that could be logged
+to the kernel log). Fix intermittent failure in connectathon
+test7 (hardlink count not immediately refreshed in case in which
+inode metadata can be incorrectly kept cached when time near zero)
+
+Version 1.08
+------------
+Allow file_mode and dir_mode (specified at mount time) to be enforced
+locally (the server already enforced its own ACLs too) for servers
+that do not report the correct mode (do not support the 
+CIFS Unix Extensions).
+
+Version 1.07
+------------
+Fix some small memory leaks in some unmount error paths. Fix major leak
+of cache pages in readpages causing multiple read oriented stress
+testcases (including fsx, and even large file copy) to fail over time. 
+
+Version 1.06
+------------
+Send NTCreateX with ATTR_POSIX if Linux/Unix extensions negotiated with server.
+This allows files that differ only in case and improves performance of file
+creation and file open to such servers.  Fix semaphore conflict which causes 
+slow delete of open file to Samba (which unfortunately can cause an oplock
+break to self while vfs_unlink held i_sem) which can hang for 20 seconds.
+
+Version 1.05
+------------
+fixes to cifs_readpages for fsx test case
+
+Version 1.04
+------------
+Fix caching data integrity bug when extending file size especially when no
+oplock on file.  Fix spurious logging of valid already parsed mount options
+that are parsed outside of the cifs vfs such as nosuid.
+
+
+Version 1.03
+------------
+Connect to server when port number override not specified, and tcp port
+unitialized.  Reset search to restart at correct file when kernel routine
+filldir returns error during large directory searches (readdir). 
+
+Version 1.02
+------------
+Fix caching problem when files opened by multiple clients in which 
+page cache could contain stale data, and write through did
+not occur often enough while file was still open when read ahead
+(read oplock) not allowed.  Treat "sep=" when first mount option
+as an override of comma as the default separator between mount
+options. 
+
+Version 1.01
+------------
+Allow passwords longer than 16 bytes. Allow null password string.
+
+Version 1.00
+------------
+Gracefully clean up failed mounts when attempting to mount to servers such as
+Windows 98 that terminate tcp sessions during protocol negotiation.  Handle
+embedded commas in mount parsing of passwords.
+
+Version 0.99
+------------
+Invalidate local inode cached pages on oplock break and when last file
+instance is closed so that the client does not continue using stale local
+copy rather than later modified server copy of file.  Do not reconnect
+when server drops the tcp session prematurely before negotiate
+protocol response.  Fix oops in reopen_file when dentry freed.  Allow
+the support for CIFS Unix Extensions to be disabled via proc interface.
+
+Version 0.98
+------------
+Fix hang in commit_write during reconnection of open files under heavy load.
+Fix unload_nls oops in a mount failure path. Serialize writes to same socket
+which also fixes any possible races when cifs signatures are enabled in SMBs
+being sent out of signature sequence number order.    
+
+Version 0.97
+------------
+Fix byte range locking bug (endian problem) causing bad offset and
+length.
+
+Version 0.96
+------------
+Fix oops (in send_sig) caused by CIFS unmount code trying to
+wake up the demultiplex thread after it had exited. Do not log
+error on harmless oplock release of closed handle.
+
+Version 0.95
+------------
+Fix unsafe global variable usage and password hash failure on gcc 3.3.1
+Fix problem reconnecting secondary mounts to same server after session 
+failure.  Fix invalid dentry - race in mkdir when directory gets created
+by another client between the lookup and mkdir.
+Version 0.94
+------------
+Fix to list processing in reopen_files. Fix reconnection when server hung
+but tcpip session still alive.  Set proper timeout on socket read.
+
+Version 0.93
+------------
+Add missing mount options including iocharset.  SMP fixes in write and open. 
+Fix errors in reconnecting after TCP session failure.  Fix module unloading
+of default nls codepage
+
+Version 0.92
+------------
+Active smb transactions should never go negative (fix double FreeXid). Fix
+list processing in file routines. Check return code on kmalloc in open.
+Fix spinlock usage for SMP.
+
+Version 0.91
+------------
+Fix oops in reopen_files when invalid dentry. drop dentry on server rename 
+and on revalidate errors. Fix cases where pid is now tgid.  Fix return code
+on create hard link when server does not support them. 
+
+Version 0.90
+------------
+Fix scheduling while atomic error in getting inode info on newly created file. 
+Fix truncate of existing files opened with O_CREAT but not O_TRUNC set.
+
+Version 0.89
+------------
+Fix oops on write to dead tcp session. Remove error log write for case when file open
+O_CREAT but not O_EXCL
+
+Version 0.88
+------------
+Fix non-POSIX behavior on rename of open file and delete of open file by taking 
+advantage of trans2 SetFileInfo rename facility if available on target server.
+Retry on ENOSPC and EAGAIN socket errors.
+
+Version 0.87
+------------
+Fix oops on big endian readdir.  Set blksize to be even power of two (2**blkbits) to fix
+allocation size miscalculation. After oplock token lost do not read through
+cache. 
+
+Version 0.86
+------------
+Fix oops on empty file readahead.  Fix for file size handling for locally cached files.
+
+Version 0.85
+------------
+Fix oops in mkdir when server fails to return inode info. Fix oops in reopen_files
+during auto reconnection to server after server recovered from failure.
+
+Version 0.84
+------------
+Finish support for Linux 2.5 open/create changes, which removes the
+redundant NTCreate/QPathInfo/close that was sent during file create.
+Enable oplock by default. Enable packet signing by default (needed to 
+access many recent Windows servers)
+
+Version 0.83
+------------
+Fix oops when mounting to long server names caused by inverted parms to kmalloc.
+Fix MultiuserMount (/proc/fs/cifs configuration setting) so that when enabled
+we will choose a cifs user session (smb uid) that better matches the local
+uid if a) the mount uid does not match the current uid and b) we have another
+session to the same server (ip address) for a different mount which
+matches the current local uid.
+
+Version 0.82
+------------
+Add support for mknod of block or character devices.  Fix oplock
+code (distributed caching) to properly send response to oplock
+break from server.
+
+Version 0.81
+------------
+Finish up CIFS packet digital signing for the default
+NTLM security case. This should help Windows 2003
+network interoperability since it is common for
+packet signing to be required now. Fix statfs (stat -f)
+which recently started returning errors due to 
+invalid value (-1 instead of 0) being set in the
+struct kstatfs f_ffiles field.
+
+Version 0.80
+-----------
+Fix oops on stopping oplock thread when removing cifs when
+built as module.
+
+Version 0.79
+------------
+Fix mount options for ro (readonly), uid, gid and file and directory mode. 
+
+Version 0.78
+------------
+Fix errors displayed on failed mounts to be more understandable.
+Fixed various incorrect or misleading smb to posix error code mappings.
+
+Version 0.77
+------------
+Fix display of NTFS DFS junctions to display as symlinks.
+They are the network equivalent.  Fix oops in 
+cifs_partialpagewrite caused by missing spinlock protection
+of openfile linked list.  Allow writebehind caching errors to 
+be returned to the application at file close.
+
+Version 0.76
+------------
+Clean up options displayed in /proc/mounts by show_options to
+be more consistent with other filesystems.
+
+Version 0.75
+------------
+Fix delete of readonly file to Windows servers.  Reflect
+presence or absence of read only dos attribute in mode
+bits for servers that do not support CIFS Unix extensions.
+Fix shortened results on readdir of large directories to
+servers supporting CIFS Unix extensions (caused by
+incorrect resume key).
+
+Version 0.74
+------------
+Fix truncate bug (set file size) that could cause hangs e.g. running fsx
+
+Version 0.73
+------------
+unload nls if mount fails.
+
+Version 0.72
+------------
+Add resume key support to search (readdir) code to workaround
+Windows bug.  Add /proc/fs/cifs/LookupCacheEnable which
+allows disabling caching of attribute information for
+lookups.
+
+Version 0.71
+------------
+Add more oplock handling (distributed caching code).  Remove
+dead code.  Remove excessive stack space utilization from
+symlink routines.
+
+Version 0.70
+------------
+Fix oops in get dfs referral (triggered when null path sent in to
+mount).  Add support for overriding rsize at mount time.
+
+Version 0.69
+------------
+Fix buffer overrun in readdir which caused intermittent kernel oopses.
+Fix writepage code to release kmap on write data.  Allow "-ip=" new 
+mount option to be passed in on parameter distinct from the first part
+(server name portion of) the UNC name.  Allow override of the
+tcp port of the target server via new mount option "-port="  
+
+Version 0.68
+------------
+Fix search handle leak on rewind.  Fix setuid and gid so that they are 
+reflected in the local inode immediately.  Cleanup of whitespace
+to make 2.4 and 2.5 versions more consistent.
+
+
+Version 0.67
+------------
+Fix signal sending so that captive thread (cifsd) exits on umount 
+(which was causing the warning in kmem_cache_free of the request buffers
+at rmmod time).  This had broken as a sideeffect of the recent global
+kernel change to daemonize.  Fix memory leak in readdir code which
+showed up in "ls -R" (and applications that did search rewinding).
+
+Version 0.66
+------------
+Reconnect tids and fids after session reconnection (still do not
+reconnect byte range locks though).  Fix problem caching
+lookup information for directory inodes, improving performance,
+especially in deep directory trees.  Fix various build warnings.
+
+Version 0.65
+------------
+Finish fixes to commit write for caching/readahead consistency.  fsx 
+now works to Samba servers.  Fix oops caused when readahead
+was interrupted by a signal.
+
+Version 0.64
+------------
+Fix data corruption (in partial page after truncate) that caused fsx to
+fail to Windows servers.  Cleaned up some extraneous error logging in
+common error paths.  Add generic sendfile support.
+
+Version 0.63
+------------
+Fix memory leak in AllocMidQEntry.
+Finish reconnection logic, so connection with server can be dropped
+(or server rebooted) and the cifs client will reconnect.  
+
+Version 0.62
+------------
+Fix temporary socket leak when bad userid or password specified 
+(or other SMBSessSetup failure).  Increase maximum buffer size to slightly
+over 16K to allow negotiation of up to Samba and Windows server default read 
+sizes.  Add support for readpages
+
+Version 0.61
+------------
+Fix oops when username not passed in on mount.  Extensive fixes and improvements
+to error logging (strip redundant newlines, change debug macros to ensure newline
+passed in and to be more consistent).  Fix writepage wrong file handle problem,
+a readonly file handle could be incorrectly used to attempt to write out
+file updates through the page cache to multiply open files.  This could cause
+the iozone benchmark to fail on the fwrite test. Fix bug mounting two different
+shares to the same Windows server when using different usernames
+(doing this to Samba servers worked but Windows was rejecting it) - now it is
+possible to use different userids when connecting to the same server from a
+Linux client. Fix oops when treeDisconnect called during unmount on
+previously freed socket.
+
+Version 0.60
+------------
+Fix oops in readpages caused by not setting address space operations in inode in 
+rare code path. 
+
+Version 0.59
+------------
+Includes support for deleting of open files and renaming over existing files (per POSIX
+requirement).  Add readlink support for Windows junction points (directory symlinks).
+
+Version 0.58
+------------
+Changed read and write to go through pagecache. Added additional address space operations.
+Memory mapped operations now working.
+
+Version 0.57
+------------
+Added writepage code for additional memory mapping support.  Fixed leak in xids causing
+the simultaneous operations counter (/proc/fs/cifs/SimultaneousOps) to increase on 
+every stat call.  Additional formatting cleanup. 
+
+Version 0.56
+------------
+Fix bigendian bug in order of time conversion. Merge 2.5 to 2.4 version.  Formatting cleanup.   
+
+Version 0.55
+------------
+Fixes from Zwane Mwaikambo for adding missing return code checking in a few places.
+Also included a modified version of his fix to protect global list manipulation of
+the smb session and tree connection and mid related global variables.
+
+Version 0.54
+------------
+Fix problem with captive thread hanging around at unmount time.  Adjust to 2.5.42-pre
+changes to superblock layout.   Remove wasteful allocation of smb buffers (now the send 
+buffer is reused for responses).  Add more oplock handling. Additional minor cleanup.
+
+Version 0.53
+------------
+More stylistic updates to better match kernel style.  Add additional statistics
+for filesystem which can be viewed via /proc/fs/cifs.  Add more pieces of NTLMv2
+and CIFS Packet Signing enablement.
+
+Version 0.52
+------------
+Replace call to sleep_on with safer wait_on_event.
+Make stylistic changes to better match kernel style recommendations.
+Remove most typedef usage (except for the PDUs themselves).
+
+Version 0.51
+------------
+Update mount so the -unc mount option is no longer required (the ip address can be specified
+in a UNC style device name.   Implementation of readpage/writepage started.
+
+Version 0.50
+------------
+Fix intermittent problem with incorrect smb header checking on badly 
+fragmented tcp responses
+
+Version 0.49
+------------
+Fixes to setting of allocation size and file size.
+
+Version 0.48
+------------
+Various 2.5.38 fixes.  Now works on 2.5.38
+
+Version 0.47
+------------
+Prepare for 2.5 kernel merge.  Remove ifdefs.
+
+Version 0.46
+------------
+Socket buffer management fixes.  Fix dual free.
+
+Version 0.45
+------------
+Various big endian fixes for hardlinks and symlinks and also for dfs.
+
+Version 0.44
+------------
+Various big endian fixes for servers with Unix extensions such as Samba
+
+Version 0.43
+------------
+Various FindNext fixes for incorrect filenames on large directory searches on big endian
+clients.  basic posix file i/o tests now work on big endian machines, not just le
+
+Version 0.42
+------------
+SessionSetup and NegotiateProtocol now work from Big Endian machines.
+Various Big Endian fixes found during testing on the Linux on 390.  Various fixes for compatibility with older
+versions of 2.4 kernel (now builds and works again on kernels at least as early as 2.4.7).
+
+Version 0.41
+------------
+Various minor fixes for Connectathon Posix "basic" file i/o test suite.  Directory caching fixed so hardlinked
+files now return the correct number of links on fstat as they are repeatedly linked and unlinked.
+
+Version 0.40
+------------
+Implemented "Raw" (i.e. not encapsulated in SPNEGO) NTLMSSP (i.e. the Security Provider Interface used to negotiate
+session advanced session authentication).  Raw NTLMSSP is preferred by Windows 2000 Professional and Windows XP.
+Began implementing support for SPNEGO encapsulation of NTLMSSP based session authentication blobs
+(which is the mechanism preferred by Windows 2000 server in the absence of Kerberos).
+
+Version 0.38
+------------
+Introduced optional mount helper utility mount.cifs and made coreq changes to cifs vfs to enable
+it. Fixed a few bugs in the DFS code (e.g. bcc two bytes too short and incorrect uid in PDU).
+
+Version 0.37
+------------
+Rewrote much of connection and mount/unmount logic to handle bugs with
+multiple uses to same share, multiple users to same server etc.
+
+Version 0.36
+------------
+Fixed major problem with dentry corruption (missing call to dput)
+
+Version 0.35
+------------
+Rewrite of readdir code to fix bug. Various fixes for bigendian machines.
+Begin adding oplock support.  Multiusermount and oplockEnabled flags added to /proc/fs/cifs
+although corresponding function not fully implemented in the vfs yet
+
+Version 0.34
+------------
+Fixed dentry caching bug, misc. cleanup 
+
+Version 0.33
+------------
+Fixed 2.5 support to handle build and configure changes as well as misc. 2.5 changes.  Now can build
+on current 2.5 beta version (2.5.24) of the Linux kernel as well as on 2.4 Linux kernels.
+Support for STATUS codes (newer 32 bit NT error codes) added.  DFS support begun to be added.
+
+Version 0.32
+------------
+Unix extensions (symlink, readlink, hardlink, chmod and some chgrp and chown) implemented
+and tested against Samba 2.2.5
+
+
+Version 0.31
+------------
+1) Fixed lockrange to be correct (it was one byte too short)
+
+2) Fixed GETLK (i.e. the fcntl call to test a range of bytes in a file to see if locked) to correctly 
+show range as locked when there is a conflict with an existing lock.
+
+3) default file perms are now 2767 (indicating support for mandatory locks) instead of 777 for directories
+in most cases.  Eventually will offer optional ability to query server for the correct perms.
+
+3) Fixed eventual trap when mounting twice to different shares on the same server when the first succeeded 
+but the second one was invalid and failed (the second one was incorrectly disconnecting the tcp and smb
+session) 
+
+4) Fixed error logging of valid mount options
+
+5) Removed logging of password field.
+
+6) Moved negotiate, treeDisconnect and uloggoffX (only tConx and SessSetup remain in connect.c) to cifssmb.c
+and cleaned them up and made them more consistent with other cifs functions. 
+
+7) Server support for Unix extensions is now fully detected and FindFirst is implemented both ways 
+(with or without Unix extensions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
+nor is the symlink support using the Unix extensions
+
+8) Started adding the readlink and follow_link code 
+
+Version 0.3 
+-----------
+Initial drop
+
diff --git a/Documentation/filesystems/cifs/README b/Documentation/filesystems/cifs/README
new file mode 100644 (file)
index 0000000..2d5622f
--- /dev/null
@@ -0,0 +1,753 @@
+The CIFS VFS support for Linux supports many advanced network filesystem 
+features such as hierarchical dfs like namespace, hardlinks, locking and more.  
+It was designed to comply with the SNIA CIFS Technical Reference (which 
+supersedes the 1992 X/Open SMB Standard) as well as to perform best practice 
+practical interoperability with Windows 2000, Windows XP, Samba and equivalent 
+servers.  This code was developed in participation with the Protocol Freedom
+Information Foundation.
+
+Please see
+  http://protocolfreedom.org/ and
+  http://samba.org/samba/PFIF/
+for more details.
+
+
+For questions or bug reports please contact:
+    sfrench@samba.org (sfrench@us.ibm.com) 
+
+Build instructions:
+==================
+For Linux 2.4:
+1) Get the kernel source (e.g.from http://www.kernel.org)
+and download the cifs vfs source (see the project page
+at http://us1.samba.org/samba/Linux_CIFS_client.html)
+and change directory into the top of the kernel directory
+then patch the kernel (e.g. "patch -p1 < cifs_24.patch") 
+to add the cifs vfs to your kernel configure options if
+it has not already been added (e.g. current SuSE and UL
+users do not need to apply the cifs_24.patch since the cifs vfs is
+already in the kernel configure menu) and then
+mkdir linux/fs/cifs and then copy the current cifs vfs files from
+the cifs download to your kernel build directory e.g.
+
+       cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
+       
+2) make menuconfig (or make xconfig)
+3) select cifs from within the network filesystem choices
+4) save and exit
+5) make dep
+6) make modules (or "make" if CIFS VFS not to be built as a module)
+
+For Linux 2.6:
+1) Download the kernel (e.g. from http://www.kernel.org)
+and change directory into the top of the kernel directory tree
+(e.g. /usr/src/linux-2.5.73)
+2) make menuconfig (or make xconfig)
+3) select cifs from within the network filesystem choices
+4) save and exit
+5) make
+
+
+Installation instructions:
+=========================
+If you have built the CIFS vfs as module (successfully) simply
+type "make modules_install" (or if you prefer, manually copy the file to
+the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o).
+
+If you have built the CIFS vfs into the kernel itself, follow the instructions
+for your distribution on how to install a new kernel (usually you
+would simply type "make install").
+
+If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on 
+the CIFS VFS web site) copy it to the same directory in which mount.smbfs and 
+similar files reside (usually /sbin).  Although the helper software is not  
+required, mount.cifs is recommended.  Eventually the Samba 3.0 utility program 
+"net" may also be helpful since it may someday provide easier mount syntax for
+users who are used to Windows e.g.
+       net use <mount point> <UNC name or cifs URL>
+Note that running the Winbind pam/nss module (logon service) on all of your
+Linux clients is useful in mapping Uids and Gids consistently across the
+domain to the proper network user.  The mount.cifs mount helper can be
+trivially built from Samba 3.0 or later source e.g. by executing:
+
+       gcc samba/source/client/mount.cifs.c -o mount.cifs
+
+If cifs is built as a module, then the size and number of network buffers
+and maximum number of simultaneous requests to one server can be configured.
+Changing these from their defaults is not recommended. By executing modinfo
+       modinfo kernel/fs/cifs/cifs.ko
+on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
+at module initialization time (by running insmod cifs.ko) can be seen.
+
+Allowing User Mounts
+====================
+To permit users to mount and unmount over directories they own is possible
+with the cifs vfs.  A way to enable such mounting is to mark the mount.cifs
+utility as suid (e.g. "chmod +s /sbin/mount.cifs). To enable users to 
+umount shares they mount requires
+1) mount.cifs version 1.4 or later
+2) an entry for the share in /etc/fstab indicating that a user may
+unmount it e.g.
+//server/usersharename  /mnt/username cifs user 0 0
+
+Note that when the mount.cifs utility is run suid (allowing user mounts), 
+in order to reduce risks, the "nosuid" mount flag is passed in on mount to
+disallow execution of an suid program mounted on the remote target.
+When mount is executed as root, nosuid is not passed in by default,
+and execution of suid programs on the remote target would be enabled
+by default. This can be changed, as with nfs and other filesystems, 
+by simply specifying "nosuid" among the mount options. For user mounts 
+though to be able to pass the suid flag to mount requires rebuilding 
+mount.cifs with the following flag: 
+        gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
+
+There is a corresponding manual page for cifs mounting in the Samba 3.0 and
+later source tree in docs/manpages/mount.cifs.8 
+
+Allowing User Unmounts
+======================
+To permit users to ummount directories that they have user mounted (see above),
+the utility umount.cifs may be used.  It may be invoked directly, or if 
+umount.cifs is placed in /sbin, umount can invoke the cifs umount helper
+(at least for most versions of the umount utility) for umount of cifs
+mounts, unless umount is invoked with -i (which will avoid invoking a umount
+helper). As with mount.cifs, to enable user unmounts umount.cifs must be marked
+as suid (e.g. "chmod +s /sbin/umount.cifs") or equivalent (some distributions
+allow adding entries to a file to the /etc/permissions file to achieve the
+equivalent suid effect).  For this utility to succeed the target path
+must be a cifs mount, and the uid of the current user must match the uid
+of the user who mounted the resource.
+
+Also note that the customary way of allowing user mounts and unmounts is 
+(instead of using mount.cifs and unmount.cifs as suid) to add a line
+to the file /etc/fstab for each //server/share you wish to mount, but
+this can become unwieldy when potential mount targets include many
+or  unpredictable UNC names.
+
+Samba Considerations 
+==================== 
+To get the maximum benefit from the CIFS VFS, we recommend using a server that 
+supports the SNIA CIFS Unix Extensions standard (e.g.  Samba 2.2.5 or later or 
+Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers.  
+Note that uid, gid and file permissions will display default values if you do 
+not have a server that supports the Unix extensions for CIFS (such as Samba 
+2.2.5 or later).  To enable the Unix CIFS Extensions in the Samba server, add 
+the line: 
+
+       unix extensions = yes
+       
+to your smb.conf file on the server.  Note that the following smb.conf settings 
+are also useful (on the Samba server) when the majority of clients are Unix or 
+Linux: 
+
+       case sensitive = yes
+       delete readonly = yes 
+       ea support = yes
+
+Note that server ea support is required for supporting xattrs from the Linux
+cifs client, and that EA support is present in later versions of Samba (e.g. 
+3.0.6 and later (also EA support works in all versions of Windows, at least to
+shares on NTFS filesystems).  Extended Attribute (xattr) support is an optional
+feature of most Linux filesystems which may require enabling via
+make menuconfig. Client support for extended attributes (user xattr) can be
+disabled on a per-mount basis by specifying "nouser_xattr" on mount.
+
+The CIFS client can get and set POSIX ACLs (getfacl, setfacl) to Samba servers
+version 3.10 and later.  Setting POSIX ACLs requires enabling both XATTR and 
+then POSIX support in the CIFS configuration options when building the cifs
+module.  POSIX ACL support can be disabled on a per mount basic by specifying
+"noacl" on mount.
+Some administrators may want to change Samba's smb.conf "map archive" and 
+"create mask" parameters from the default.  Unless the create mask is changed
+newly created files can end up with an unnecessarily restrictive default mode,
+which may not be what you want, although if the CIFS Unix extensions are
+enabled on the server and client, subsequent setattr calls (e.g. chmod) can
+fix the mode.  Note that creating special devices (mknod) remotely 
+may require specifying a mkdev function to Samba if you are not using 
+Samba 3.0.6 or later.  For more information on these see the manual pages
+("man smb.conf") on the Samba server system.  Note that the cifs vfs,
+unlike the smbfs vfs, does not read the smb.conf on the client system 
+(the few optional settings are passed in on mount via -o parameters instead).  
+Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete
+open files (required for strict POSIX compliance).  Windows Servers already 
+supported this feature. Samba server does not allow symlinks that refer to files
+outside of the share, so in Samba versions prior to 3.0.6, most symlinks to
+files with absolute paths (ie beginning with slash) such as:
+        ln -s /mnt/foo bar
+would be forbidden. Samba 3.0.6 server or later includes the ability to create 
+such symlinks safely by converting unsafe symlinks (ie symlinks to server 
+files that are outside of the share) to a samba specific format on the server
+that is ignored by local server applications and non-cifs clients and that will
+not be traversed by the Samba server).  This is opaque to the Linux client
+application using the cifs vfs. Absolute symlinks will work to Samba 3.0.5 or
+later, but only for remote clients using the CIFS Unix extensions, and will
+be invisbile to Windows clients and typically will not affect local
+applications running on the same server as Samba.  
+
+Use instructions:
+================
+Once the CIFS VFS support is built into the kernel or installed as a module 
+(cifs.o), you can use mount syntax like the following to access Samba or Windows 
+servers: 
+
+  mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
+
+Before -o the option -v may be specified to make the mount.cifs
+mount helper display the mount steps more verbosely.  
+After -o the following commonly used cifs vfs specific options
+are supported:
+
+  user=<username>
+  pass=<password>
+  domain=<domain name>
+  
+Other cifs mount options are described below.  Use of TCP names (in addition to
+ip addresses) is available if the mount helper (mount.cifs) is installed. If
+you do not trust the server to which are mounted, or if you do not have
+cifs signing enabled (and the physical network is insecure), consider use
+of the standard mount options "noexec" and "nosuid" to reduce the risk of 
+running an altered binary on your local system (downloaded from a hostile server
+or altered by a hostile router).
+
+Although mounting using format corresponding to the CIFS URL specification is
+not possible in mount.cifs yet, it is possible to use an alternate format
+for the server and sharename (which is somewhat similar to NFS style mount
+syntax) instead of the more widely used UNC format (i.e. \\server\share):
+  mount -t cifs tcp_name_of_server:share_name /mnt -o user=myname,pass=mypasswd
+
+When using the mount helper mount.cifs, passwords may be specified via alternate
+mechanisms, instead of specifying it after -o using the normal "pass=" syntax
+on the command line:
+1) By including it in a credential file. Specify credentials=filename as one
+of the mount options. Credential files contain two lines
+        username=someuser
+        password=your_password
+2) By specifying the password in the PASSWD environment variable (similarly
+the user name can be taken from the USER environment variable).
+3) By specifying the password in a file by name via PASSWD_FILE
+4) By specifying the password in a file by file descriptor via PASSWD_FD
+
+If no password is provided, mount.cifs will prompt for password entry
+
+Restrictions
+============
+Servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC 
+1001/1002 support for "Netbios-Over-TCP/IP." This is not likely to be a 
+problem as most servers support this.
+
+Valid filenames differ between Windows and Linux.  Windows typically restricts
+filenames which contain certain reserved characters (e.g.the character : 
+which is used to delimit the beginning of a stream name by Windows), while
+Linux allows a slightly wider set of valid characters in filenames. Windows
+servers can remap such characters when an explicit mapping is specified in
+the Server's registry.  Samba starting with version 3.10 will allow such 
+filenames (ie those which contain valid Linux characters, which normally
+would be forbidden for Windows/CIFS semantics) as long as the server is
+configured for Unix Extensions (and the client has not disabled
+/proc/fs/cifs/LinuxExtensionsEnabled).
+  
+
+CIFS VFS Mount Options
+======================
+A partial list of the supported mount options follows:
+  user         The user name to use when trying to establish
+               the CIFS session.
+  password     The user password.  If the mount helper is
+               installed, the user will be prompted for password
+               if not supplied.
+  ip           The ip address of the target server
+  unc          The target server Universal Network Name (export) to 
+               mount.  
+  domain       Set the SMB/CIFS workgroup name prepended to the
+               username during CIFS session establishment
+  forceuid     Set the default uid for inodes to the uid
+               passed in on mount. For mounts to servers
+               which do support the CIFS Unix extensions, such as a
+               properly configured Samba server, the server provides
+               the uid, gid and mode so this parameter should not be
+               specified unless the server and clients uid and gid
+               numbering differ.  If the server and client are in the
+               same domain (e.g. running winbind or nss_ldap) and
+               the server supports the Unix Extensions then the uid
+               and gid can be retrieved from the server (and uid
+               and gid would not have to be specifed on the mount. 
+               For servers which do not support the CIFS Unix
+               extensions, the default uid (and gid) returned on lookup
+               of existing files will be the uid (gid) of the person
+               who executed the mount (root, except when mount.cifs
+               is configured setuid for user mounts) unless the "uid=" 
+               (gid) mount option is specified. Also note that permission
+               checks (authorization checks) on accesses to a file occur
+               at the server, but there are cases in which an administrator
+               may want to restrict at the client as well.  For those
+               servers which do not report a uid/gid owner
+               (such as Windows), permissions can also be checked at the
+               client, and a crude form of client side permission checking 
+               can be enabled by specifying file_mode and dir_mode on 
+               the client.  (default)
+  forcegid     (similar to above but for the groupid instead of uid) (default)
+  noforceuid   Fill in file owner information (uid) by requesting it from
+               the server if possible. With this option, the value given in
+               the uid= option (on mount) will only be used if the server
+               can not support returning uids on inodes.
+  noforcegid   (similar to above but for the group owner, gid, instead of uid)
+  uid          Set the default uid for inodes, and indicate to the
+               cifs kernel driver which local user mounted. If the server
+               supports the unix extensions the default uid is
+               not used to fill in the owner fields of inodes (files)
+               unless the "forceuid" parameter is specified.
+  gid          Set the default gid for inodes (similar to above).
+  file_mode     If CIFS Unix extensions are not supported by the server
+               this overrides the default mode for file inodes.
+  fsc          Enable local disk caching using FS-Cache (off by default). This
+               option could be useful to improve performance on a slow link,
+               heavily loaded server and/or network where reading from the
+               disk is faster than reading from the server (over the network).
+               This could also impact scalability positively as the
+               number of calls to the server are reduced. However, local
+               caching is not suitable for all workloads for e.g. read-once
+               type workloads. So, you need to consider carefully your
+               workload/scenario before using this option. Currently, local
+               disk caching is functional for CIFS files opened as read-only.
+  dir_mode      If CIFS Unix extensions are not supported by the server 
+               this overrides the default mode for directory inodes.
+  port         attempt to contact the server on this tcp port, before
+               trying the usual ports (port 445, then 139).
+  iocharset     Codepage used to convert local path names to and from
+               Unicode. Unicode is used by default for network path
+               names if the server supports it.  If iocharset is
+               not specified then the nls_default specified
+               during the local client kernel build will be used.
+               If server does not support Unicode, this parameter is
+               unused.
+  rsize                default read size (usually 16K). The client currently
+               can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize
+               defaults to 16K and may be changed (from 8K to the maximum
+               kmalloc size allowed by your kernel) at module install time
+               for cifs.ko. Setting CIFSMaxBufSize to a very large value
+               will cause cifs to use more memory and may reduce performance
+               in some cases.  To use rsize greater than 127K (the original
+               cifs protocol maximum) also requires that the server support
+               a new Unix Capability flag (for very large read) which some
+               newer servers (e.g. Samba 3.0.26 or later) do. rsize can be
+               set from a minimum of 2048 to a maximum of 130048 (127K or
+               CIFSMaxBufSize, whichever is smaller)
+  wsize                default write size (default 57344)
+               maximum wsize currently allowed by CIFS is 57344 (fourteen
+               4096 byte pages)
+  actimeo=n    attribute cache timeout in seconds (default 1 second).
+               After this timeout, the cifs client requests fresh attribute
+               information from the server. This option allows to tune the
+               attribute cache timeout to suit the workload needs. Shorter
+               timeouts mean better the cache coherency, but increased number
+               of calls to the server. Longer timeouts mean reduced number
+               of calls to the server at the expense of less stricter cache
+               coherency checks (i.e. incorrect attribute cache for a short
+               period of time).
+  rw           mount the network share read-write (note that the
+               server may still consider the share read-only)
+  ro           mount network share read-only
+  version      used to distinguish different versions of the
+               mount helper utility (not typically needed)
+  sep          if first mount option (after the -o), overrides
+               the comma as the separator between the mount
+               parms. e.g.
+                       -o user=myname,password=mypassword,domain=mydom
+               could be passed instead with period as the separator by
+                       -o sep=.user=myname.password=mypassword.domain=mydom
+               this might be useful when comma is contained within username
+               or password or domain. This option is less important
+               when the cifs mount helper cifs.mount (version 1.1 or later)
+               is used.
+  nosuid        Do not allow remote executables with the suid bit 
+               program to be executed.  This is only meaningful for mounts
+               to servers such as Samba which support the CIFS Unix Extensions.
+               If you do not trust the servers in your network (your mount
+               targets) it is recommended that you specify this option for
+               greater security.
+  exec         Permit execution of binaries on the mount.
+  noexec       Do not permit execution of binaries on the mount.
+  dev          Recognize block devices on the remote mount.
+  nodev                Do not recognize devices on the remote mount.
+  suid          Allow remote files on this mountpoint with suid enabled to 
+               be executed (default for mounts when executed as root,
+               nosuid is default for user mounts).
+  credentials   Although ignored by the cifs kernel component, it is used by 
+               the mount helper, mount.cifs. When mount.cifs is installed it
+               opens and reads the credential file specified in order  
+               to obtain the userid and password arguments which are passed to
+               the cifs vfs.
+  guest         Although ignored by the kernel component, the mount.cifs
+               mount helper will not prompt the user for a password
+               if guest is specified on the mount options.  If no
+               password is specified a null password will be used.
+  perm          Client does permission checks (vfs_permission check of uid
+               and gid of the file against the mode and desired operation),
+               Note that this is in addition to the normal ACL check on the
+               target machine done by the server software. 
+               Client permission checking is enabled by default.
+  noperm        Client does not do permission checks.  This can expose
+               files on this mount to access by other users on the local
+               client system. It is typically only needed when the server
+               supports the CIFS Unix Extensions but the UIDs/GIDs on the
+               client and server system do not match closely enough to allow
+               access by the user doing the mount, but it may be useful with
+               non CIFS Unix Extension mounts for cases in which the default
+               mode is specified on the mount but is not to be enforced on the
+               client (e.g. perhaps when MultiUserMount is enabled)
+               Note that this does not affect the normal ACL check on the
+               target machine done by the server software (of the server
+               ACL against the user name provided at mount time).
+  serverino    Use server's inode numbers instead of generating automatically
+               incrementing inode numbers on the client.  Although this will
+               make it easier to spot hardlinked files (as they will have
+               the same inode numbers) and inode numbers may be persistent,
+               note that the server does not guarantee that the inode numbers
+               are unique if multiple server side mounts are exported under a
+               single share (since inode numbers on the servers might not
+               be unique if multiple filesystems are mounted under the same
+               shared higher level directory).  Note that some older
+               (e.g. pre-Windows 2000) do not support returning UniqueIDs
+               or the CIFS Unix Extensions equivalent and for those
+               this mount option will have no effect.  Exporting cifs mounts
+               under nfsd requires this mount option on the cifs mount.
+               This is now the default if server supports the 
+               required network operation.
+  noserverino   Client generates inode numbers (rather than using the actual one
+               from the server). These inode numbers will vary after
+               unmount or reboot which can confuse some applications,
+               but not all server filesystems support unique inode
+               numbers.
+  setuids       If the CIFS Unix extensions are negotiated with the server
+               the client will attempt to set the effective uid and gid of
+               the local process on newly created files, directories, and
+               devices (create, mkdir, mknod).  If the CIFS Unix Extensions
+               are not negotiated, for newly created files and directories
+               instead of using the default uid and gid specified on
+               the mount, cache the new file's uid and gid locally which means
+               that the uid for the file can change when the inode is
+               reloaded (or the user remounts the share).
+  nosetuids     The client will not attempt to set the uid and gid on
+               on newly created files, directories, and devices (create, 
+               mkdir, mknod) which will result in the server setting the
+               uid and gid to the default (usually the server uid of the
+               user who mounted the share).  Letting the server (rather than
+               the client) set the uid and gid is the default. If the CIFS
+               Unix Extensions are not negotiated then the uid and gid for
+               new files will appear to be the uid (gid) of the mounter or the
+               uid (gid) parameter specified on the mount.
+  netbiosname   When mounting to servers via port 139, specifies the RFC1001
+               source name to use to represent the client netbios machine 
+               name when doing the RFC1001 netbios session initialize.
+  direct        Do not do inode data caching on files opened on this mount.
+               This precludes mmapping files on this mount. In some cases
+               with fast networks and little or no caching benefits on the
+               client (e.g. when the application is doing large sequential
+               reads bigger than page size without rereading the same data) 
+               this can provide better performance than the default
+               behavior which caches reads (readahead) and writes 
+               (writebehind) through the local Linux client pagecache 
+               if oplock (caching token) is granted and held. Note that
+               direct allows write operations larger than page size
+               to be sent to the server.
+  strictcache   Use for switching on strict cache mode. In this mode the
+               client read from the cache all the time it has Oplock Level II,
+               otherwise - read from the server. All written data are stored
+               in the cache, but if the client doesn't have Exclusive Oplock,
+               it writes the data to the server.
+  rwpidforward  Forward pid of a process who opened a file to any read or write
+               operation on that file. This prevent applications like WINE
+               from failing on read and write if we use mandatory brlock style.
+  acl          Allow setfacl and getfacl to manage posix ACLs if server
+               supports them.  (default)
+  noacl        Do not allow setfacl and getfacl calls on this mount
+  user_xattr    Allow getting and setting user xattrs (those attributes whose
+               name begins with "user." or "os2.") as OS/2 EAs (extended
+               attributes) to the server.  This allows support of the
+               setfattr and getfattr utilities. (default)
+  nouser_xattr  Do not allow getfattr/setfattr to get/set/list xattrs 
+  mapchars      Translate six of the seven reserved characters (not backslash)
+                       *?<>|:
+               to the remap range (above 0xF000), which also
+               allows the CIFS client to recognize files created with
+               such characters by Windows's POSIX emulation. This can
+               also be useful when mounting to most versions of Samba
+               (which also forbids creating and opening files
+               whose names contain any of these seven characters).
+               This has no effect if the server does not support
+               Unicode on the wire.
+ nomapchars     Do not translate any of these seven characters (default).
+ nocase         Request case insensitive path name matching (case
+               sensitive is the default if the server supports it).
+               (mount option "ignorecase" is identical to "nocase")
+ posixpaths     If CIFS Unix extensions are supported, attempt to
+               negotiate posix path name support which allows certain
+               characters forbidden in typical CIFS filenames, without
+               requiring remapping. (default)
+ noposixpaths   If CIFS Unix extensions are supported, do not request
+               posix path name support (this may cause servers to
+               reject creatingfile with certain reserved characters).
+ nounix         Disable the CIFS Unix Extensions for this mount (tree
+               connection). This is rarely needed, but it may be useful
+               in order to turn off multiple settings all at once (ie
+               posix acls, posix locks, posix paths, symlink support
+               and retrieving uids/gids/mode from the server) or to
+               work around a bug in server which implement the Unix
+               Extensions.
+ nobrl          Do not send byte range lock requests to the server.
+               This is necessary for certain applications that break
+               with cifs style mandatory byte range locks (and most
+               cifs servers do not yet support requesting advisory
+               byte range locks).
+ forcemandatorylock Even if the server supports posix (advisory) byte range
+               locking, send only mandatory lock requests.  For some
+               (presumably rare) applications, originally coded for
+               DOS/Windows, which require Windows style mandatory byte range
+               locking, they may be able to take advantage of this option,
+               forcing the cifs client to only send mandatory locks
+               even if the cifs server would support posix advisory locks.
+               "forcemand" is accepted as a shorter form of this mount
+               option.
+ nostrictsync   If this mount option is set, when an application does an
+               fsync call then the cifs client does not send an SMB Flush
+               to the server (to force the server to write all dirty data
+               for this file immediately to disk), although cifs still sends
+               all dirty (cached) file data to the server and waits for the
+               server to respond to the write.  Since SMB Flush can be
+               very slow, and some servers may be reliable enough (to risk
+               delaying slightly flushing the data to disk on the server),
+               turning on this option may be useful to improve performance for
+               applications that fsync too much, at a small risk of server
+               crash.  If this mount option is not set, by default cifs will
+               send an SMB flush request (and wait for a response) on every
+               fsync call.
+ nodfs          Disable DFS (global name space support) even if the
+               server claims to support it.  This can help work around
+               a problem with parsing of DFS paths with Samba server
+               versions 3.0.24 and 3.0.25.
+ remount        remount the share (often used to change from ro to rw mounts
+               or vice versa)
+ cifsacl        Report mode bits (e.g. on stat) based on the Windows ACL for
+               the file. (EXPERIMENTAL)
+ servern        Specify the server 's netbios name (RFC1001 name) to use
+               when attempting to setup a session to the server. 
+               This is needed for mounting to some older servers (such
+               as OS/2 or Windows 98 and Windows ME) since they do not
+               support a default server name.  A server name can be up
+               to 15 characters long and is usually uppercased.
+ sfu            When the CIFS Unix Extensions are not negotiated, attempt to
+               create device files and fifos in a format compatible with
+               Services for Unix (SFU).  In addition retrieve bits 10-12
+               of the mode via the SETFILEBITS extended attribute (as
+               SFU does).  In the future the bottom 9 bits of the
+               mode also will be emulated using queries of the security
+               descriptor (ACL).
+ mfsymlinks     Enable support for Minshall+French symlinks
+               (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks)
+               This option is ignored when specified together with the
+               'sfu' option. Minshall+French symlinks are used even if
+               the server supports the CIFS Unix Extensions.
+ sign           Must use packet signing (helps avoid unwanted data modification
+               by intermediate systems in the route).  Note that signing
+               does not work with lanman or plaintext authentication.
+ seal           Must seal (encrypt) all data on this mounted share before
+               sending on the network.  Requires support for Unix Extensions.
+               Note that this differs from the sign mount option in that it
+               causes encryption of data sent over this mounted share but other
+               shares mounted to the same server are unaffected.
+ locallease     This option is rarely needed. Fcntl F_SETLEASE is
+               used by some applications such as Samba and NFSv4 server to
+               check to see whether a file is cacheable.  CIFS has no way
+               to explicitly request a lease, but can check whether a file
+               is cacheable (oplocked).  Unfortunately, even if a file
+               is not oplocked, it could still be cacheable (ie cifs client
+               could grant fcntl leases if no other local processes are using
+               the file) for cases for example such as when the server does not
+               support oplocks and the user is sure that the only updates to
+               the file will be from this client. Specifying this mount option
+               will allow the cifs client to check for leases (only) locally
+               for files which are not oplocked instead of denying leases
+               in that case. (EXPERIMENTAL)
+ sec            Security mode.  Allowed values are:
+                       none    attempt to connection as a null user (no name)
+                       krb5    Use Kerberos version 5 authentication
+                       krb5i   Use Kerberos authentication and packet signing
+                       ntlm    Use NTLM password hashing (default)
+                       ntlmi   Use NTLM password hashing with signing (if
+                               /proc/fs/cifs/PacketSigningEnabled on or if
+                               server requires signing also can be the default) 
+                       ntlmv2  Use NTLMv2 password hashing      
+                       ntlmv2i Use NTLMv2 password hashing with packet signing
+                       lanman  (if configured in kernel config) use older
+                               lanman hash
+hard           Retry file operations if server is not responding
+soft           Limit retries to unresponsive servers (usually only
+               one retry) before returning an error.  (default)
+
+The mount.cifs mount helper also accepts a few mount options before -o
+including:
+
+       -S      take password from stdin (equivalent to setting the environment
+               variable "PASSWD_FD=0"
+       -V      print mount.cifs version
+       -?      display simple usage information
+
+With most 2.6 kernel versions of modutils, the version of the cifs kernel
+module can be displayed via modinfo.
+
+Misc /proc/fs/cifs Flags and Debug Info
+=======================================
+Informational pseudo-files:
+DebugData              Displays information about active CIFS sessions and
+                       shares, features enabled as well as the cifs.ko
+                       version.
+Stats                  Lists summary resource usage information as well as per
+                       share statistics, if CONFIG_CIFS_STATS in enabled
+                       in the kernel configuration.
+
+Configuration pseudo-files:
+PacketSigningEnabled   If set to one, cifs packet signing is enabled
+                       and will be used if the server requires 
+                       it.  If set to two, cifs packet signing is
+                       required even if the server considers packet
+                       signing optional. (default 1)
+SecurityFlags          Flags which control security negotiation and
+                       also packet signing. Authentication (may/must)
+                       flags (e.g. for NTLM and/or NTLMv2) may be combined with
+                       the signing flags.  Specifying two different password
+                       hashing mechanisms (as "must use") on the other hand 
+                       does not make much sense. Default flags are 
+                               0x07007 
+                       (NTLM, NTLMv2 and packet signing allowed).  The maximum 
+                       allowable flags if you want to allow mounts to servers
+                       using weaker password hashes is 0x37037 (lanman,
+                       plaintext, ntlm, ntlmv2, signing allowed).  Some
+                       SecurityFlags require the corresponding menuconfig
+                       options to be enabled (lanman and plaintext require
+                       CONFIG_CIFS_WEAK_PW_HASH for example).  Enabling
+                       plaintext authentication currently requires also
+                       enabling lanman authentication in the security flags
+                       because the cifs module only supports sending
+                       laintext passwords using the older lanman dialect
+                       form of the session setup SMB.  (e.g. for authentication
+                       using plain text passwords, set the SecurityFlags
+                       to 0x30030):
+                       may use packet signing                          0x00001
+                       must use packet signing                         0x01001
+                       may use NTLM (most common password hash)        0x00002
+                       must use NTLM                                   0x02002
+                       may use NTLMv2                                  0x00004
+                       must use NTLMv2                                 0x04004
+                       may use Kerberos security                       0x00008
+                       must use Kerberos                               0x08008
+                       may use lanman (weak) password hash             0x00010
+                       must use lanman password hash                   0x10010
+                       may use plaintext passwords                     0x00020
+                       must use plaintext passwords                    0x20020
+                       (reserved for future packet encryption)         0x00040
+
+cifsFYI                        If set to non-zero value, additional debug information
+                       will be logged to the system error log.  This field
+                       contains three flags controlling different classes of
+                       debugging entries.  The maximum value it can be set
+                       to is 7 which enables all debugging points (default 0).
+                       Some debugging statements are not compiled into the
+                       cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
+                       kernel configuration. cifsFYI may be set to one or
+                       nore of the following flags (7 sets them all):
+
+                       log cifs informational messages                 0x01
+                       log return codes from cifs entry points         0x02
+                       log slow responses (ie which take longer than 1 second)
+                         CONFIG_CIFS_STATS2 must be enabled in .config 0x04
+                               
+                               
+traceSMB               If set to one, debug information is logged to the
+                       system error log with the start of smb requests
+                       and responses (default 0)
+LookupCacheEnable      If set to one, inode information is kept cached
+                       for one second improving performance of lookups
+                       (default 1)
+OplockEnabled          If set to one, safe distributed caching enabled.
+                       (default 1)
+LinuxExtensionsEnabled If set to one then the client will attempt to
+                       use the CIFS "UNIX" extensions which are optional
+                       protocol enhancements that allow CIFS servers
+                       to return accurate UID/GID information as well
+                       as support symbolic links. If you use servers
+                       such as Samba that support the CIFS Unix
+                       extensions but do not want to use symbolic link
+                       support and want to map the uid and gid fields 
+                       to values supplied at mount (rather than the 
+                       actual values, then set this to zero. (default 1)
+
+These experimental features and tracing can be enabled by changing flags in 
+/proc/fs/cifs (after the cifs module has been installed or built into the 
+kernel, e.g.  insmod cifs).  To enable a feature set it to 1 e.g.  to enable 
+tracing to the kernel message log type: 
+
+       echo 7 > /proc/fs/cifs/cifsFYI
+       
+cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
+logging of various informational messages.  2 enables logging of non-zero
+SMB return codes while 4 enables logging of requests that take longer
+than one second to complete (except for byte range lock requests). 
+Setting it to 4 requires defining CONFIG_CIFS_STATS2 manually in the
+source code (typically by setting it in the beginning of cifsglob.h),
+and setting it to seven enables all three.  Finally, tracing
+the start of smb requests and responses can be enabled via:
+
+       echo 1 > /proc/fs/cifs/traceSMB
+
+Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
+if the kernel was configured with cifs statistics enabled.  The statistics
+represent the number of successful (ie non-zero return code from the server) 
+SMB responses to some of the more common commands (open, delete, mkdir etc.).
+Also recorded is the total bytes read and bytes written to the server for
+that share.  Note that due to client caching effects this can be less than the
+number of bytes read and written by the application running on the client.
+The statistics for the number of total SMBs and oplock breaks are different in
+that they represent all for that share, not just those for which the server
+returned success.
+       
+Also note that "cat /proc/fs/cifs/DebugData" will display information about
+the active sessions and the shares that are mounted.
+
+Enabling Kerberos (extended security) works but requires version 1.2 or later
+of the helper program cifs.upcall to be present and to be configured in the
+/etc/request-key.conf file.  The cifs.upcall helper program is from the Samba
+project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
+require this helper. Note that NTLMv2 security (which does not require the
+cifs.upcall helper program), instead of using Kerberos, is sufficient for
+some use cases.
+
+DFS support allows transparent redirection to shares in an MS-DFS name space.
+In addition, DFS support for target shares which are specified as UNC
+names which begin with host names (rather than IP addresses) requires
+a user space helper (such as cifs.upcall) to be present in order to
+translate host names to ip address, and the user space helper must also
+be configured in the file /etc/request-key.conf.  Samba, Windows servers and
+many NAS appliances support DFS as a way of constructing a global name
+space to ease network configuration and improve reliability.
+
+To use cifs Kerberos and DFS support, the Linux keyutils package should be
+installed and something like the following lines should be added to the
+/etc/request-key.conf file:
+
+create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
+create dns_resolver * * /usr/local/sbin/cifs.upcall %k
+
+CIFS kernel module parameters
+=============================
+These module parameters can be specified or modified either during the time of
+module loading or during the runtime by using the interface
+       /proc/module/cifs/parameters/<param>
+
+i.e. echo "value" > /sys/module/cifs/parameters/<param>
+
+1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
+                   [Y/y/1]. To disable use any of [N/n/0].
+
diff --git a/Documentation/filesystems/cifs/TODO b/Documentation/filesystems/cifs/TODO
new file mode 100644 (file)
index 0000000..355abcd
--- /dev/null
@@ -0,0 +1,129 @@
+Version 1.53 May 20, 2008
+
+A Partial List of Missing Features
+==================================
+
+Contributions are welcome.  There are plenty of opportunities
+for visible, important contributions to this module.  Here
+is a partial list of the known problems and missing features:
+
+a) Support for SecurityDescriptors(Windows/CIFS ACLs) for chmod/chgrp/chown
+so that these operations can be supported to Windows servers
+
+b) Mapping POSIX ACLs (and eventually NFSv4 ACLs) to CIFS
+SecurityDescriptors
+
+c) Better pam/winbind integration (e.g. to handle uid mapping
+better)
+
+d) Cleanup now unneeded SessSetup code in
+fs/cifs/connect.c and add back in NTLMSSP code if any servers
+need it
+
+e) fix NTLMv2 signing when two mounts with different users to same
+server.
+
+f) Directory entry caching relies on a 1 second timer, rather than 
+using FindNotify or equivalent.  - (started)
+
+g) quota support (needs minor kernel change since quota calls
+to make it to network filesystems or deviceless filesystems)
+
+h) investigate sync behavior (including syncpage) and check  
+for proper behavior of intr/nointr
+
+i) improve support for very old servers (OS/2 and Win9x for example)
+Including support for changing the time remotely (utimes command).
+
+j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
+extra copy in/out of the socket buffers in some cases.
+
+k) Better optimize open (and pathbased setfilesize) to reduce the
+oplock breaks coming from windows srv.  Piggyback identical file
+opens on top of each other by incrementing reference count rather
+than resending (helps reduce server resource utilization and avoid
+spurious oplock breaks).
+
+l) Improve performance of readpages by sending more than one read
+at a time when 8 pages or more are requested. In conjuntion
+add support for async_cifs_readpages.
+
+m) Add support for storing symlink info to Windows servers 
+in the Extended Attribute format their SFU clients would recognize.
+
+n) Finish fcntl D_NOTIFY support so kde and gnome file list windows
+will autorefresh (partially complete by Asser). Needs minor kernel
+vfs change to support removing D_NOTIFY on a file.   
+
+o) Add GUI tool to configure /proc/fs/cifs settings and for display of
+the CIFS statistics (started)
+
+p) implement support for security and trusted categories of xattrs
+(requires minor protocol extension) to enable better support for SELINUX
+
+q) Implement O_DIRECT flag on open (already supported on mount)
+
+r) Create UID mapping facility so server UIDs can be mapped on a per
+mount or a per server basis to client UIDs or nobody if no mapping
+exists.  This is helpful when Unix extensions are negotiated to
+allow better permission checking when UIDs differ on the server
+and client.  Add new protocol request to the CIFS protocol 
+standard for asking the server for the corresponding name of a
+particular uid.
+
+s) Add support for CIFS Unix and also the newer POSIX extensions to the
+server side for Samba 4.
+
+t) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers) 
+need to add ability to set time to server (utimes command)
+
+u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
+
+v) mount check for unmatched uids
+
+w) Add support for new vfs entry point for fallocate
+
+x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
+processes can proceed better in parallel (on the server)
+
+y) Fix Samba 3 to handle reads/writes over 127K (and remove the cifs mount
+restriction of wsize max being 127K) 
+
+KNOWN BUGS (updated April 24, 2007)
+====================================
+See http://bugzilla.samba.org - search on product "CifsVFS" for
+current bug list.
+
+1) existing symbolic links (Windows reparse points) are recognized but
+can not be created remotely. They are implemented for Samba and those that
+support the CIFS Unix extensions, although earlier versions of Samba
+overly restrict the pathnames.
+2) follow_link and readdir code does not follow dfs junctions
+but recognizes them
+3) create of new files to FAT partitions on Windows servers can
+succeed but still return access denied (appears to be Windows 
+server not cifs client problem) and has not been reproduced recently.
+NTFS partitions do not have this problem.
+4) Unix/POSIX capabilities are reset after reconnection, and affect
+a few fields in the tree connection but we do do not know which
+superblocks to apply these changes to.  We should probably walk
+the list of superblocks to set these.  Also need to check the
+flags on the second mount to the same share, and see if we
+can do the same trick that NFS does to remount duplicate shares.
+
+Misc testing to do
+==================
+1) check out max path names and max path name components against various server
+types. Try nested symlinks (8 deep). Return max path name in stat -f information
+
+2) Modify file portion of ltp so it can run against a mounted network
+share and run it against cifs vfs in automated fashion.
+
+3) Additional performance testing and optimization using iozone and similar - 
+there are some easy changes that can be done to parallelize sequential writes,
+and when signing is disabled to request larger read sizes (larger than 
+negotiated size) and send larger write sizes to modern servers.
+
+4) More exhaustively test against less common servers.  More testing
+against Windows 9x, Windows ME servers.
+
diff --git a/Documentation/filesystems/cifs/cifs.txt b/Documentation/filesystems/cifs/cifs.txt
new file mode 100644 (file)
index 0000000..2fac91a
--- /dev/null
@@ -0,0 +1,31 @@
+  This is the client VFS module for the Common Internet File System
+  (CIFS) protocol which is the successor to the Server Message Block 
+  (SMB) protocol, the native file sharing mechanism for most early
+  PC operating systems. New and improved versions of CIFS are now
+  called SMB2 and SMB3. These dialects are also supported by the
+  CIFS VFS module. CIFS is fully supported by network
+  file servers such as Windows 2000, 2003, 2008 and 2012
+  as well by Samba (which provides excellent CIFS
+  server support for Linux and many other operating systems), so
+  this network filesystem client can mount to a wide variety of
+  servers.
+
+  The intent of this module is to provide the most advanced network
+  file system function for CIFS compliant servers, including better
+  POSIX compliance, secure per-user session establishment, high
+  performance safe distributed caching (oplock), optional packet
+  signing, large files, Unicode support and other internationalization
+  improvements. Since both Samba server and this filesystem client support
+  the CIFS Unix extensions, the combination can provide a reasonable 
+  alternative to NFSv4 for fileserving in some Linux to Linux environments,
+  not just in Linux to Windows environments.
+
+  This filesystem has an mount utility (mount.cifs) that can be obtained from
+
+      https://ftp.samba.org/pub/linux-cifs/cifs-utils/
+
+  It must be installed in the directory with the other mount helpers.
+
+  For more information on the module see the project wiki page at
+
+      https://wiki.samba.org/index.php/LinuxCIFS_utils
diff --git a/Documentation/filesystems/cifs/winucase_convert.pl b/Documentation/filesystems/cifs/winucase_convert.pl
new file mode 100755 (executable)
index 0000000..322a9c8
--- /dev/null
@@ -0,0 +1,62 @@
+#!/usr/bin/perl -w
+#
+# winucase_convert.pl -- convert "Windows 8 Upper Case Mapping Table.txt" to
+#                        a two-level set of C arrays.
+#
+#   Copyright 2013: Jeff Layton <jlayton@redhat.com>
+#
+#   This program is free software: you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation, either version 3 of the License, or
+#   (at your option) any later version.
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License
+#   along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+while(<>) {
+       next if (!/^0x(..)(..)\t0x(....)\t/);
+       $firstchar = hex($1);
+       $secondchar = hex($2);
+       $uppercase = hex($3);
+
+       $top[$firstchar][$secondchar] = $uppercase;
+}
+
+for ($i = 0; $i < 256; $i++) {
+       next if (!$top[$i]);
+
+       printf("static const wchar_t t2_%2.2x[256] = {", $i);
+       for ($j = 0; $j < 256; $j++) {
+               if (($j % 8) == 0) {
+                       print "\n\t";
+               } else {
+                       print " ";
+               }
+               printf("0x%4.4x,", $top[$i][$j] ? $top[$i][$j] : 0);
+       }
+       print "\n};\n\n";
+}
+
+printf("static const wchar_t *const toplevel[256] = {", $i);
+for ($i = 0; $i < 256; $i++) {
+       if (($i % 8) == 0) {
+               print "\n\t";
+       } elsif ($top[$i]) {
+               print " ";
+       } else {
+               print "  ";
+       }
+
+       if ($top[$i]) {
+               printf("t2_%2.2x,", $i);
+       } else {
+               print "NULL,";
+       }
+}
+print "\n};\n\n";
index 206a1bd..f089058 100644 (file)
@@ -451,3 +451,7 @@ in your dentry operations instead.
 --
 [mandatory]
        ->readdir() is gone now; switch to ->iterate()
+[mandatory]
+       vfs_follow_link has been removed.  Filesystems must use nd_set_link
+       from ->follow_link for normal symlinks, or nd_jump_link for magic
+       /proc/<pid> style links.
index fcc22c9..823c95f 100644 (file)
@@ -854,16 +854,15 @@ Committed_AS: The amount of memory presently allocated on the system.
               The committed memory is a sum of all of the memory which
               has been allocated by processes, even if it has not been
               "used" by them as of yet. A process which malloc()'s 1G
-              of memory, but only touches 300M of it will only show up
-              as using 300M of memory even if it has the address space
-              allocated for the entire 1G. This 1G is memory which has
-              been "committed" to by the VM and can be used at any time
-              by the allocating application. With strict overcommit
-              enabled on the system (mode 2 in 'vm.overcommit_memory'),
-              allocations which would exceed the CommitLimit (detailed
-              above) will not be permitted. This is useful if one needs
-              to guarantee that processes will not fail due to lack of
-              memory once that memory has been successfully allocated.
+              of memory, but only touches 300M of it will show up as
+             using 1G. This 1G is memory which has been "committed" to
+              by the VM and can be used at any time by the allocating
+              application. With strict overcommit enabled on the system
+              (mode 2 in 'vm.overcommit_memory'),allocations which would
+              exceed the CommitLimit (detailed above) will not be permitted.
+              This is useful if one needs to guarantee that processes will
+              not fail due to lack of memory once that memory has been
+              successfully allocated.
 VmallocTotal: total size of vmalloc memory area
  VmallocUsed: amount of vmalloc area which is used
 VmallocChunk: largest contiguous block of vmalloc area which is free
index 59b4a09..b176928 100644 (file)
@@ -79,6 +79,10 @@ to just make sure certain lists can't become empty.
 Most systems just mount another filesystem over rootfs and ignore it.  The
 amount of space an empty instance of ramfs takes up is tiny.
 
+If CONFIG_TMPFS is enabled, rootfs will use tmpfs instead of ramfs by
+default.  To force ramfs, add "rootfstype=ramfs" to the kernel command
+line.
+
 What is initramfs?
 ------------------
 
index 79e4c2e..d08d8c1 100644 (file)
@@ -18,7 +18,25 @@ All three channels can be also controlled using the engine micro programs.
 More details of the instructions can be found from the public data sheet.
 
 LP5521 has the internal program memory for running various LED patterns.
-For the details, please refer to 'firmware' section in leds-lp55xx.txt
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode and enginex_load
+  Control interface for the engines:
+  x is 1 .. 3
+  enginex_mode : disabled, load, run
+  enginex_load : store program (visible only in engine load mode)
+
+  Example (start to blink the channel 2 led):
+  cd   /sys/class/leds/lp5521:channel2/device
+  echo "load" > engine3_mode
+  echo "037f4d0003ff6000" > engine3_load
+  echo "run" > engine3_mode
+
+  To stop the engine:
+  echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+  For the details, please refer to 'firmware' section in leds-lp55xx.txt
 
 sysfs contains a selftest entry.
 The test communicates with the chip and checks that
index 899fdad..5b3e91d 100644 (file)
@@ -28,7 +28,26 @@ If both fields are NULL, 'lp5523' is used by default.
 /sys/class/leds/lp5523:channelN  (N: 0 ~ 8)
 
 LP5523 has the internal program memory for running various LED patterns.
-For the details, please refer to 'firmware' section in leds-lp55xx.txt
+There are two ways to run LED patterns.
+
+1) Legacy interface - enginex_mode, enginex_load and enginex_leds
+  Control interface for the engines:
+  x is 1 .. 3
+  enginex_mode : disabled, load, run
+  enginex_load : microcode load (visible only in load mode)
+  enginex_leds : led mux control (visible only in load mode)
+
+  cd /sys/class/leds/lp5523:channel2/device
+  echo "load" > engine3_mode
+  echo "9d80400004ff05ff437f0000" > engine3_load
+  echo "111111111" > engine3_leds
+  echo "run" > engine3_mode
+
+  To stop the engine:
+  echo "disabled" > engine3_mode
+
+2) Firmware interface - LP55xx common interface
+  For the details, please refer to 'firmware' section in leds-lp55xx.txt
 
 Selftest uses always the current from the platform data.
 
index eec8fa2..82713ff 100644 (file)
@@ -1,11 +1,11 @@
-LP5521/LP5523/LP55231 Common Driver
-===================================
+LP5521/LP5523/LP55231/LP5562/LP8501 Common Driver
+=================================================
 
 Authors: Milo(Woogyom) Kim <milo.kim@ti.com>
 
 Description
 -----------
-LP5521, LP5523/55231 and LP5562 have common features as below.
+LP5521, LP5523/55231, LP5562 and LP8501 have common features as below.
 
   Register access via the I2C
   Device initialization/deinitialization
@@ -109,6 +109,30 @@ As soon as 'loading' is set to 0, registered callback is called.
 Inside the callback, the selected engine is loaded and memory is updated.
 To run programmed pattern, 'run_engine' attribute should be enabled.
 
+The pattern sqeuence of LP8501 is same as LP5523.
+However pattern data is specific.
+Ex 1) Engine 1 is used
+echo 1 > /sys/bus/i2c/devices/xxxx/select_engine
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+echo 1 > /sys/bus/i2c/devices/xxxx/run_engine
+
+Ex 2) Engine 2 and 3 are used at the same time
+echo 2 > /sys/bus/i2c/devices/xxxx/select_engine
+sleep 1
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0140ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+sleep 1
+echo 3 > /sys/bus/i2c/devices/xxxx/select_engine
+sleep 1
+echo 1 > /sys/class/firmware/lp8501/loading
+echo "9d0340ff7e0040007e00a001c000" > /sys/class/firmware/lp8501/data
+echo 0 > /sys/class/firmware/lp8501/loading
+sleep 1
+echo 1 > /sys/class/leds/d1/device/run_engine
+
 ( 'run_engine' and 'firmware_cb' )
 The sequence of running the program data is common.
 But each device has own specific register addresses for commands.
index 18b64b2..f11580f 100644 (file)
@@ -86,6 +86,8 @@ generic_netlink.txt
        - info on Generic Netlink
 gianfar.txt
        - Gianfar Ethernet Driver.
+i40e.txt
+       - README for the Intel Ethernet Controller XL710 Driver (i40e).
 ieee802154.txt
        - Linux IEEE 802.15.4 implementation, API and drivers
 igb.txt
diff --git a/Documentation/networking/i40e.txt b/Documentation/networking/i40e.txt
new file mode 100644 (file)
index 0000000..f737273
--- /dev/null
@@ -0,0 +1,115 @@
+Linux Base Driver for the Intel(R) Ethernet Controller XL710 Family
+===================================================================
+
+Intel i40e Linux driver.
+Copyright(c) 2013 Intel Corporation.
+
+Contents
+========
+
+- Identifying Your Adapter
+- Additional Configurations
+- Performance Tuning
+- Known Issues
+- Support
+
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with the Intel Ethernet
+Controller XL710 Family.
+
+For more information on how to identify your adapter, go to the Adapter &
+Driver ID Guide at:
+
+    http://support.intel.com/support/network/sb/CS-012904.htm
+
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command:
+
+     Make oldconfig/silentoldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+       -> Device Drivers
+         -> Network device support (NETDEVICES [=y])
+           -> Ethernet driver support
+             -> Intel devices
+               -> Intel(R) Ethernet Controller XL710 Family
+
+Additional Configurations
+=========================
+
+  Generic Receive Offload (GRO)
+  -----------------------------
+  The driver supports the in-kernel software implementation of GRO.  GRO has
+  shown that by coalescing Rx traffic into larger chunks of data, CPU
+  utilization can be significantly reduced when under large Rx load.  GRO is
+  an evolution of the previously-used LRO interface.  GRO is able to coalesce
+  other protocols besides TCP.  It's also safe to use with configurations that
+  are problematic for LRO, namely bridging and iSCSI.
+
+  Ethtool
+  -------
+  The driver utilizes the ethtool interface for driver configuration and
+  diagnostics, as well as displaying statistical information. The latest
+  ethtool version is required for this functionality.
+
+  The latest release of ethtool can be found from
+  https://www.kernel.org/pub/software/network/ethtool
+
+  Data Center Bridging (DCB)
+  --------------------------
+  DCB configuration is not currently supported.
+
+  FCoE
+  ----
+  Fiber Channel over Ethernet (FCoE) hardware offload is not currently
+  supported.
+
+  MAC and VLAN anti-spoofing feature
+  ----------------------------------
+  When a malicious driver attempts to send a spoofed packet, it is dropped by
+  the hardware and not transmitted.  An interrupt is sent to the PF driver
+  notifying it of the spoof attempt.
+
+  When a spoofed packet is detected the PF driver will send the following
+  message to the system log (displayed by  the "dmesg" command):
+
+  Spoof event(s) detected on VF (n)
+
+  Where n=the VF that attempted to do the spoofing.
+
+
+Performance Tuning
+==================
+
+An excellent article on performance tuning can be found at:
+
+http://www.redhat.com/promo/summit/2008/downloads/pdf/Thursday/Mark_Wagner.pdf
+
+
+Known Issues
+============
+
+
+Support
+=======
+
+For general information, go to the Intel support website at:
+
+    http://support.intel.com
+
+or the Intel Wired Networking project hosted by Sourceforge at:
+
+    http://e1000.sourceforge.net
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to e1000-devel@lists.sourceforge.net and copy
+netdev@vger.kernel.org.
index d529e02..f14f493 100644 (file)
@@ -66,9 +66,7 @@ rq->cfs.load value, which is the sum of the weights of the tasks queued on the
 runqueue.
 
 CFS maintains a time-ordered rbtree, where all runnable tasks are sorted by the
-p->se.vruntime key (there is a subtraction using rq->cfs.min_vruntime to
-account for possible wraparounds).  CFS picks the "leftmost" task from this
-tree and sticks to it.
+p->se.vruntime key. CFS picks the "leftmost" task from this tree and sticks to it.
 As the system progresses forwards, the executed tasks are put into the tree
 more and more to the right --- slowly but surely giving a chance for every task
 to become the "leftmost task" and thus get on the CPU within a deterministic
index cc92ca8..6edaa65 100644 (file)
@@ -1,3 +1,13 @@
+Release Date    : Sat. Aug 31, 2013 17:00:00 PST 2013 -
+                       (emaild-id:megaraidlinux@lsi.com)
+                       Adam Radford
+                       Kashyap Desai
+                       Sumit Saxena
+Current Version : 06.700.06.00-rc1
+Old Version     : 06.600.18.00-rc1
+    1. Add High Availability clustering support using shared Logical Disks.
+    2. Version and Changelog update.
+-------------------------------------------------------------------------------
 Release Date    : Wed. May 15, 2013 17:00:00 PST 2013 -
                        (emaild-id:megaraidlinux@lsi.com)
                        Adam Radford
index ab7d16e..9d4c1d1 100644 (file)
@@ -182,6 +182,7 @@ core_pattern is used to specify a core dumpfile pattern name.
        %<NUL>  '%' is dropped
        %%      output one '%'
        %p      pid
+       %P      global pid (init PID namespace)
        %u      uid
        %g      gid
        %d      dump mode, matches PR_SET_DUMPABLE and
index 36ecc26..79a797e 100644 (file)
@@ -200,17 +200,25 @@ fragmentation index is <= extfrag_threshold. The default value is 500.
 
 hugepages_treat_as_movable
 
-This parameter is only useful when kernelcore= is specified at boot time to
-create ZONE_MOVABLE for pages that may be reclaimed or migrated. Huge pages
-are not movable so are not normally allocated from ZONE_MOVABLE. A non-zero
-value written to hugepages_treat_as_movable allows huge pages to be allocated
-from ZONE_MOVABLE.
-
-Once enabled, the ZONE_MOVABLE is treated as an area of memory the huge
-pages pool can easily grow or shrink within. Assuming that applications are
-not running that mlock() a lot of memory, it is likely the huge pages pool
-can grow to the size of ZONE_MOVABLE by repeatedly entering the desired value
-into nr_hugepages and triggering page reclaim.
+This parameter controls whether we can allocate hugepages from ZONE_MOVABLE
+or not. If set to non-zero, hugepages can be allocated from ZONE_MOVABLE.
+ZONE_MOVABLE is created when kernel boot parameter kernelcore= is specified,
+so this parameter has no effect if used without kernelcore=.
+
+Hugepage migration is now available in some situations which depend on the
+architecture and/or the hugepage size. If a hugepage supports migration,
+allocation from ZONE_MOVABLE is always enabled for the hugepage regardless
+of the value of this parameter.
+IOW, this parameter affects only non-migratable hugepages.
+
+Assuming that hugepages are not migratable in your system, one usecase of
+this parameter is that users can make hugepage pool more extensible by
+enabling the allocation from ZONE_MOVABLE. This is because on ZONE_MOVABLE
+page reclaim/migration/compaction work more and you can get contiguous
+memory more likely. Note that using ZONE_MOVABLE for non-migratable
+hugepages can do harm to other features like memory hotremove (because
+memory hotremove expects that memory blocks on ZONE_MOVABLE are always
+removable,) so it's a trade-off responsible for the users.
 
 ==============================================================
 
index 2b46f67..9010c44 100644 (file)
@@ -1,17 +1,17 @@
-Kernel driver exynos4_tmu
+Kernel driver exynos_tmu
 =================
 
 Supported chips:
-* ARM SAMSUNG EXYNOS4 series of SoC
-  Prefix: 'exynos4-tmu'
+* ARM SAMSUNG EXYNOS4, EXYNOS5 series of SoC
   Datasheet: Not publicly available
 
 Authors: Donggeun Kim <dg77.kim@samsung.com>
+Authors: Amit Daniel <amit.daniel@samsung.com>
 
-Description
------------
+TMU controller Description:
+---------------------------
 
-This driver allows to read temperature inside SAMSUNG EXYNOS4 series of SoC.
+This driver allows to read temperature inside SAMSUNG EXYNOS4/5 series of SoC.
 
 The chip only exposes the measured 8-bit temperature code value
 through a register.
@@ -34,9 +34,9 @@ The three equations are:
   TI2: Trimming info for 85 degree Celsius (stored at TRIMINFO register)
        Temperature code measured at 85 degree Celsius which is unchanged
 
-TMU(Thermal Management Unit) in EXYNOS4 generates interrupt
+TMU(Thermal Management Unit) in EXYNOS4/5 generates interrupt
 when temperature exceeds pre-defined levels.
-The maximum number of configurable threshold is four.
+The maximum number of configurable threshold is five.
 The threshold levels are defined as follows:
   Level_0: current temperature > trigger_level_0 + threshold
   Level_1: current temperature > trigger_level_1 + threshold
@@ -47,6 +47,31 @@ The threshold levels are defined as follows:
   through the corresponding registers.
 
 When an interrupt occurs, this driver notify kernel thermal framework
-with the function exynos4_report_trigger.
+with the function exynos_report_trigger.
 Although an interrupt condition for level_0 can be set,
 it can be used to synchronize the cooling action.
+
+TMU driver description:
+-----------------------
+
+The exynos thermal driver is structured as,
+
+                                       Kernel Core thermal framework
+                               (thermal_core.c, step_wise.c, cpu_cooling.c)
+                                                               ^
+                                                               |
+                                                               |
+TMU configuration data -------> TMU Driver  <------> Exynos Core thermal wrapper
+(exynos_tmu_data.c)          (exynos_tmu.c)       (exynos_thermal_common.c)
+(exynos_tmu_data.h)          (exynos_tmu.h)       (exynos_thermal_common.h)
+
+a) TMU configuration data: This consist of TMU register offsets/bitfields
+               described through structure exynos_tmu_registers. Also several
+               other platform data (struct exynos_tmu_platform_data) members
+               are used to configure the TMU.
+b) TMU driver: This component initialises the TMU controller and sets different
+               thresholds. It invokes core thermal implementation with the call
+               exynos_report_trigger.
+c) Exynos Core thermal wrapper: This provides 3 wrapper function to use the
+               Kernel core thermal framework. They are exynos_unregister_thermal,
+               exynos_register_thermal and exynos_report_trigger.
index a71bd5b..87519cb 100644 (file)
@@ -134,6 +134,13 @@ temperature) and throttle appropriate devices.
                this thermal zone and cdev, for a particular trip point.
                If nth bit is set, then the cdev and thermal zone are bound
                for trip point n.
+    .limits: This is an array of cooling state limits. Must have exactly
+         2 * thermal_zone.number_of_trip_points. It is an array consisting
+         of tuples <lower-state upper-state> of state limits. Each trip
+         will be associated with one state limit tuple when binding.
+         A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS>
+         on all trips. These limits are used when binding a cdev to a
+         trip point.
     .match: This call back returns success(0) if the 'tz and cdev' need to
            be bound, as per platform data.
 1.4.2 struct thermal_zone_params
@@ -142,6 +149,11 @@ temperature) and throttle appropriate devices.
     This is an optional feature where some platforms can choose not to
     provide this data.
     .governor_name: Name of the thermal governor used for this zone
+    .no_hwmon: a boolean to indicate if the thermal to hwmon sysfs interface
+               is required. when no_hwmon == false, a hwmon sysfs interface
+               will be created. when no_hwmon == true, nothing will be done.
+               In case the thermal_zone_params is NULL, the hwmon interface
+               will be created (for backward compatibility).
     .num_tbps: Number of thermal_bind_params entries for this zone
     .tbp: thermal_bind_params entries
 
index 4ac359b..bdd4bb9 100644 (file)
@@ -165,6 +165,7 @@ which function as described above for the default huge page-sized case.
 
 
 Interaction of Task Memory Policy with Huge Page Allocation/Freeing
+===================================================================
 
 Whether huge pages are allocated and freed via the /proc interface or
 the /sysfs interface using the nr_hugepages_mempolicy attribute, the NUMA
@@ -229,6 +230,7 @@ resulting effect on persistent huge page allocation is as follows:
    of huge pages over all on-lines nodes with memory.
 
 Per Node Hugepages Attributes
+=============================
 
 A subset of the contents of the root huge page control directory in sysfs,
 described above, will be replicated under each the system device of each
@@ -258,6 +260,7 @@ applied, from which node the huge page allocation will be attempted.
 
 
 Using Huge Pages
+================
 
 If the user applications are going to request huge pages using mmap system
 call, then it is required that system administrator mount a file system of
@@ -296,20 +299,16 @@ calls, though the mount of filesystem will be required for using mmap calls
 without MAP_HUGETLB.  For an example of how to use mmap with MAP_HUGETLB see
 map_hugetlb.c.
 
-*******************************************************************
+Examples
+========
 
-/*
- * map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
- */
+1) map_hugetlb: see tools/testing/selftests/vm/map_hugetlb.c
 
-*******************************************************************
+2) hugepage-shm:  see tools/testing/selftests/vm/hugepage-shm.c
 
-/*
- * hugepage-shm:  see tools/testing/selftests/vm/hugepage-shm.c
- */
+3) hugepage-mmap:  see tools/testing/selftests/vm/hugepage-mmap.c
 
-*******************************************************************
-
-/*
- * hugepage-mmap:  see tools/testing/selftests/vm/hugepage-mmap.c
- */
+4) The libhugetlbfs (http://libhugetlbfs.sourceforge.net) library provides a
+   wide range of userspace tools to help with huge page usability, environment
+   setup, and control. Furthermore it provides useful test cases that should be
+   used when modifying code to ensure no regressions are introduced.
index 9a12a59..55684d1 100644 (file)
@@ -28,6 +28,13 @@ This is so, since the pages are still mapped to physical memory, and thus all
 the kernel does is finds this fact out and puts both writable and soft-dirty
 bits on the PTE.
 
+  While in most cases tracking memory changes by #PF-s is more than enough
+there is still a scenario when we can lose soft dirty bits -- a task
+unmaps a previously mapped memory region and then maps a new one at exactly
+the same place. When unmap is called, the kernel internally clears PTE values
+including soft dirty bits. To notify user space application about such
+memory region renewal the kernel always marks new memory regions (and
+expanded regions) as soft dirty.
 
   This feature is actively used by the checkpoint-restore project. You
 can find more details about it on http://criu.org
index d721af1..e61c2e8 100644 (file)
@@ -1028,7 +1028,7 @@ F:        arch/arm/mach-orion5x/ts78xx-*
 ARM/MICREL KS8695 ARCHITECTURE
 M:     Greg Ungerer <gerg@uclinux.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-F:     arch/arm/mach-ks8695
+F:     arch/arm/mach-ks8695/
 S:     Odd Fixes
 
 ARM/MIOA701 MACHINE SUPPORT
@@ -1048,7 +1048,6 @@ M:        STEricsson <STEricsson_nomadik_linux@list.st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-nomadik/
-F:     arch/arm/plat-nomadik/
 F:     drivers/i2c/busses/i2c-nomadik.c
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-nomadik.git
 
@@ -1070,7 +1069,7 @@ F:        drivers/mmc/host/msm_sdcc.h
 F:     drivers/tty/serial/msm_serial.h
 F:     drivers/tty/serial/msm_serial.c
 F:     drivers/*/pm8???-*
-F:     drivers/ssbi/
+F:     drivers/mfd/ssbi/
 F:     include/linux/mfd/pm8xxx/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/davidb/linux-msm.git
 S:     Maintained
@@ -1156,7 +1155,6 @@ L:        linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
 W:     http://www.fluff.org/ben/linux/
 S:     Maintained
 F:     arch/arm/plat-samsung/
-F:     arch/arm/plat-s3c24xx/
 F:     arch/arm/mach-s3c24*/
 F:     arch/arm/mach-s3c64xx/
 F:     drivers/*/*s3c2410*
@@ -1179,8 +1177,6 @@ L:        linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 F:     arch/arm/mach-s5pv210/mach-aquila.c
 F:     arch/arm/mach-s5pv210/mach-goni.c
-F:     arch/arm/mach-exynos/mach-universal_c210.c
-F:     arch/arm/mach-exynos/mach-nuri.c
 
 ARM/SAMSUNG S5P SERIES 2D GRAPHICS ACCELERATION (G2D) SUPPORT
 M:     Kyungmin Park <kyungmin.park@samsung.com>
@@ -1325,7 +1321,7 @@ F:        drivers/mmc/host/wmt-sdmmc.c
 F:     drivers/pwm/pwm-vt8500.c
 F:     drivers/rtc/rtc-vt8500.c
 F:     drivers/tty/serial/vt8500_serial.c
-F:     drivers/usb/host/ehci-vt8500.c
+F:     drivers/usb/host/ehci-platform.c
 F:     drivers/usb/host/uhci-platform.c
 F:     drivers/video/vt8500lcdfb.*
 F:     drivers/video/wm8505fb*
@@ -1815,6 +1811,17 @@ L:       netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/bnx2x/
 
+BROADCOM BCM281XX/BCM11XXX ARM ARCHITECTURE
+M:     Christian Daudt <csd@broadcom.com>
+T:     git git://git.github.com/broadcom/bcm11351
+S:     Maintained
+F:     arch/arm/mach-bcm/
+F:     arch/arm/boot/dts/bcm113*
+F:     arch/arm/boot/dts/bcm281*
+F:     arch/arm/configs/bcm_defconfig
+F:     drivers/mmc/host/sdhci_bcm_kona.c
+F:     drivers/clocksource/bcm_kona_timer.c
+
 BROADCOM BCM2835 ARM ARCHICTURE
 M:     Stephen Warren <swarren@wwwdotorg.org>
 L:     linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
@@ -2035,10 +2042,10 @@ W:      http://ceph.com/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client.git
 S:     Supported
 F:     Documentation/filesystems/ceph.txt
-F:     fs/ceph
-F:     net/ceph
-F:     include/linux/ceph
-F:     include/linux/crush
+F:     fs/ceph/
+F:     net/ceph/
+F:     include/linux/ceph/
+F:     include/linux/crush/
 
 CERTIFIED WIRELESS USB (WUSB) SUBSYSTEM:
 L:     linux-usb@vger.kernel.org
@@ -2335,7 +2342,7 @@ CPU POWER MONITORING SUBSYSTEM
 M:     Dominik Brodowski <linux@dominikbrodowski.net>
 M:     Thomas Renninger <trenn@suse.de>
 S:     Maintained
-F:     tools/power/cpupower
+F:     tools/power/cpupower/
 
 CPUSETS
 M:     Li Zefan <lizefan@huawei.com>
@@ -2773,7 +2780,7 @@ L:        intel-gfx@lists.freedesktop.org
 L:     dri-devel@lists.freedesktop.org
 T:     git git://people.freedesktop.org/~danvet/drm-intel
 S:     Supported
-F:     drivers/gpu/drm/i915
+F:     drivers/gpu/drm/i915/
 F:     include/drm/i915*
 F:     include/uapi/drm/i915*
 
@@ -2785,7 +2792,7 @@ M:        Kyungmin Park <kyungmin.park@samsung.com>
 L:     dri-devel@lists.freedesktop.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/daeinki/drm-exynos.git
 S:     Supported
-F:     drivers/gpu/drm/exynos
+F:     drivers/gpu/drm/exynos/
 F:     include/drm/exynos*
 F:     include/uapi/drm/exynos*
 
@@ -3038,7 +3045,7 @@ M:        Mauro Carvalho Chehab <m.chehab@samsung.com>
 L:     linux-edac@vger.kernel.org
 W:     bluesmoke.sourceforge.net
 S:     Maintained
-F:     drivers/edac/ghes-edac.c
+F:     drivers/edac/ghes_edac.c
 
 EDAC-I82443BXGX
 M:     Tim Small <tim@buttersideup.com>
@@ -3644,8 +3651,8 @@ M:        Arnd Bergmann <arnd@arndb.de>
 L:     linux-arch@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/arnd/asm-generic.git
 S:     Maintained
-F:     include/asm-generic
-F:     include/uapi/asm-generic
+F:     include/asm-generic/
+F:     include/uapi/asm-generic/
 
 GENERIC UIO DRIVER FOR PCI DEVICES
 M:     "Michael S. Tsirkin" <mst@redhat.com>
@@ -3687,7 +3694,8 @@ GRE DEMULTIPLEXER DRIVER
 M:     Dmitry Kozlov <xeb@mail.ru>
 L:     netdev@vger.kernel.org
 S:     Maintained
-F:     net/ipv4/gre.c
+F:     net/ipv4/gre_demux.c
+F:     net/ipv4/gre_offload.c
 F:     include/net/gre.h
 
 GRETH 10/100/1G Ethernet MAC device driver
@@ -3765,7 +3773,7 @@ L:        linux-media@vger.kernel.org
 T:     git git://linuxtv.org/media_tree.git
 W:     http://linuxtv.org
 S:     Odd Fixes
-F:     drivers/media/usb/hdpvr
+F:     drivers/media/usb/hdpvr/
 
 HWPOISON MEMORY FAILURE HANDLING
 M:     Andi Kleen <andi@firstfloor.org>
@@ -4355,7 +4363,7 @@ M:        Deepak Saxena <dsaxena@plexity.net>
 S:     Maintained
 F:     drivers/char/hw_random/ixp4xx-rng.c
 
-INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/igbvf/ixgb/ixgbe/ixgbevf)
+INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/igbvf/ixgb/ixgbe/ixgbevf/i40e)
 M:     Jeff Kirsher <jeffrey.t.kirsher@intel.com>
 M:     Jesse Brandeburg <jesse.brandeburg@intel.com>
 M:     Bruce Allan <bruce.w.allan@intel.com>
@@ -4380,6 +4388,7 @@ F:        Documentation/networking/igbvf.txt
 F:     Documentation/networking/ixgb.txt
 F:     Documentation/networking/ixgbe.txt
 F:     Documentation/networking/ixgbevf.txt
+F:     Documentation/networking/i40e.txt
 F:     drivers/net/ethernet/intel/
 
 INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
@@ -4573,7 +4582,7 @@ S:        Supported
 W:     http://www.openfabrics.org
 W:     www.open-iscsi.org
 Q:     http://patchwork.kernel.org/project/linux-rdma/list/
-F:     drivers/infiniband/ulp/iser
+F:     drivers/infiniband/ulp/iser/
 
 ISDN SUBSYSTEM
 M:     Karsten Keil <isdn@linux-pingi.de>
@@ -4627,7 +4636,7 @@ W:        http://palosaari.fi/linux/
 Q:     http://patchwork.linuxtv.org/project/linux-media/list/
 T:     git git://linuxtv.org/anttip/media_tree.git
 S:     Maintained
-F:     drivers/media/tuners/it913x*
+F:     drivers/media/tuners/tuner_it913x*
 
 IVTV VIDEO4LINUX DRIVER
 M:     Andy Walls <awalls@md.metrocast.net>
@@ -5963,15 +5972,12 @@ S:      Maintained
 F:     arch/arm/*omap*/*pm*
 F:     drivers/cpufreq/omap-cpufreq.c
 
-OMAP POWERDOMAIN/CLOCKDOMAIN SOC ADAPTATION LAYER SUPPORT
+OMAP POWERDOMAIN SOC ADAPTATION LAYER SUPPORT
 M:     Rajendra Nayak <rnayak@ti.com>
 M:     Paul Walmsley <paul@pwsan.com>
 L:     linux-omap@vger.kernel.org
 S:     Maintained
-F:     arch/arm/mach-omap2/powerdomain2xxx_3xxx.c
-F:     arch/arm/mach-omap2/powerdomain44xx.c
-F:     arch/arm/mach-omap2/clockdomain2xxx_3xxx.c
-F:     arch/arm/mach-omap2/clockdomain44xx.c
+F:     arch/arm/mach-omap2/prm*
 
 OMAP AUDIO SUPPORT
 M:     Peter Ujfalusi <peter.ujfalusi@ti.com>
@@ -6137,7 +6143,7 @@ W:        http://openrisc.net
 L:     linux@lists.openrisc.net (moderated for non-subscribers)
 S:     Maintained
 T:     git git://openrisc.net/~jonas/linux
-F:     arch/openrisc
+F:     arch/openrisc/
 
 OPENVSWITCH
 M:     Jesse Gross <jesse@nicira.com>
@@ -6428,7 +6434,7 @@ M:        Jamie Iles <jamie@jamieiles.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 T:     git git://github.com/jamieiles/linux-2.6-ji.git
 S:     Supported
-F:     arch/arm/mach-picoxcell
+F:     arch/arm/mach-picoxcell/
 F:     drivers/*/picoxcell*
 F:     drivers/*/*/picoxcell*
 
@@ -6701,7 +6707,7 @@ F:        drivers/spi/spi-pxa2xx*
 F:     drivers/usb/gadget/pxa2*
 F:     include/sound/pxa2xx-lib.h
 F:     sound/arm/pxa*
-F:     sound/soc/pxa
+F:     sound/soc/pxa/
 
 MMP SUPPORT
 M:     Eric Miao <eric.y.miao@gmail.com>
@@ -7154,7 +7160,7 @@ SAMSUNG AUDIO (ASoC) DRIVERS
 M:     Sangbeom Kim <sbkim73@samsung.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
-F:     sound/soc/samsung
+F:     sound/soc/samsung/
 
 SAMSUNG FRAMEBUFFER DRIVER
 M:     Jingoo Han <jg1.han@samsung.com>
@@ -7200,7 +7206,7 @@ SERIAL DRIVERS
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 L:     linux-serial@vger.kernel.org
 S:     Maintained
-F:     drivers/tty/serial
+F:     drivers/tty/serial/
 
 SYNOPSYS DESIGNWARE DMAC DRIVER
 M:     Viresh Kumar <viresh.linux@gmail.com>
@@ -7235,7 +7241,7 @@ TLG2300 VIDEO4LINUX-2 DRIVER
 M:     Huang Shijie <shijie8@gmail.com>
 M:     Hans Verkuil <hverkuil@xs4all.nl>
 S:     Odd Fixes
-F:     drivers/media/usb/tlg2300
+F:     drivers/media/usb/tlg2300/
 
 SC1200 WDT DRIVER
 M:     Zwane Mwaikambo <zwane@arm.linux.org.uk>
@@ -7496,7 +7502,7 @@ L:        linux-media@vger.kernel.org
 T:     git git://linuxtv.org/media_tree.git
 W:     http://linuxtv.org
 S:     Odd Fixes
-F:     drivers/media/radio/radio-si4713.h
+F:     drivers/media/radio/radio-si4713.c
 
 SIANO DVB DRIVER
 M:     Mauro Carvalho Chehab <m.chehab@samsung.com>
@@ -7505,9 +7511,9 @@ W:        http://linuxtv.org
 T:     git git://linuxtv.org/media_tree.git
 S:     Odd fixes
 F:     drivers/media/common/siano/
-F:     drivers/media/dvb/siano/
 F:     drivers/media/usb/siano/
-F:     drivers/media/mmc/siano
+F:     drivers/media/usb/siano/
+F:     drivers/media/mmc/siano/
 
 SH_VEU V4L2 MEM2MEM DRIVER
 M:     Guennadi Liakhovetski <g.liakhovetski@gmx.de>
@@ -7545,9 +7551,9 @@ P:        Vincent Sanders <vince@simtec.co.uk>
 M:     Simtec Linux Team <linux@simtec.co.uk>
 W:     http://www.simtec.co.uk/products/EB2410ITX/
 S:     Supported
-F:     arch/arm/mach-s3c2410/mach-bast.c
-F:     arch/arm/mach-s3c2410/bast-ide.c
-F:     arch/arm/mach-s3c2410/bast-irq.c
+F:     arch/arm/mach-s3c24xx/mach-bast.c
+F:     arch/arm/mach-s3c24xx/bast-ide.c
+F:     arch/arm/mach-s3c24xx/bast-irq.c
 
 TI DAVINCI MACHINE SUPPORT
 M:     Sekhar Nori <nsekhar@ti.com>
@@ -7556,7 +7562,7 @@ L:        davinci-linux-open-source@linux.davincidsp.com (moderated for non-subscribers
 T:     git git://gitorious.org/linux-davinci/linux-davinci.git
 Q:     http://patchwork.kernel.org/project/linux-davinci/list/
 S:     Supported
-F:     arch/arm/mach-davinci
+F:     arch/arm/mach-davinci/
 F:     drivers/i2c/busses/i2c-davinci.c
 
 TI DAVINCI SERIES MEDIA DRIVER
@@ -7641,7 +7647,7 @@ SMIA AND SMIA++ IMAGE SENSOR DRIVER
 M:     Sakari Ailus <sakari.ailus@iki.fi>
 L:     linux-media@vger.kernel.org
 S:     Maintained
-F:     drivers/media/i2c/smiapp
+F:     drivers/media/i2c/smiapp/
 F:     include/media/smiapp.h
 F:     drivers/media/i2c/smiapp-pll.c
 F:     drivers/media/i2c/smiapp-pll.h
@@ -7744,6 +7750,11 @@ W:       http://tifmxx.berlios.de/
 S:     Maintained
 F:     drivers/memstick/host/tifm_ms.c
 
+SONY MEMORYSTICK STANDARD SUPPORT
+M:     Maxim Levitsky <maximlevitsky@gmail.com>
+S:     Maintained
+F:     drivers/memstick/core/ms_block.*
+
 SOUND
 M:     Jaroslav Kysela <perex@perex.cz>
 M:     Takashi Iwai <tiwai@suse.de>
@@ -7820,35 +7831,7 @@ L:       spear-devel@list.st.com
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 W:     http://www.st.com/spear
 S:     Maintained
-F:     arch/arm/plat-spear/
-
-SPEAR13XX MACHINE SUPPORT
-M:     Viresh Kumar <viresh.linux@gmail.com>
-M:     Shiraz Hashim <shiraz.hashim@st.com>
-L:     spear-devel@list.st.com
-L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:     http://www.st.com/spear
-S:     Maintained
-F:     arch/arm/mach-spear13xx/
-
-SPEAR3XX MACHINE SUPPORT
-M:     Viresh Kumar <viresh.linux@gmail.com>
-M:     Shiraz Hashim <shiraz.hashim@st.com>
-L:     spear-devel@list.st.com
-L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:     http://www.st.com/spear
-S:     Maintained
-F:     arch/arm/mach-spear3xx/
-
-SPEAR6XX MACHINE SUPPORT
-M:     Rajeev Kumar <rajeev-dlh.kumar@st.com>
-M:     Shiraz Hashim <shiraz.hashim@st.com>
-M:     Viresh Kumar <viresh.linux@gmail.com>
-L:     spear-devel@list.st.com
-L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-W:     http://www.st.com/spear
-S:     Maintained
-F:     arch/arm/mach-spear6xx/
+F:     arch/arm/mach-spear/
 
 SPEAR CLOCK FRAMEWORK SUPPORT
 M:     Viresh Kumar <viresh.linux@gmail.com>
@@ -8117,7 +8100,7 @@ M:        Vineet Gupta <vgupta@synopsys.com>
 S:     Supported
 F:     arch/arc/
 F:     Documentation/devicetree/bindings/arc/
-F:     drivers/tty/serial/arc-uart.c
+F:     drivers/tty/serial/arc_uart.c
 
 SYSV FILESYSTEM
 M:     Christoph Hellwig <hch@infradead.org>
@@ -8807,7 +8790,6 @@ L:        linux-usb@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:     Maintained
 F:     drivers/usb/phy/
-F:     drivers/usb/otg/
 
 USB PRINTER DRIVER (usblp)
 M:     Pete Zaitcev <zaitcev@redhat.com>
@@ -9338,7 +9320,7 @@ M:        Matthew Garrett <matthew.garrett@nebula.com>
 L:     platform-driver-x86@vger.kernel.org
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86.git
 S:     Maintained
-F:     drivers/platform/x86
+F:     drivers/platform/x86/
 
 X86 MCE INFRASTRUCTURE
 M:     Tony Luck <tony.luck@intel.com>
index a42f26a..de004ce 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,8 +1,8 @@
 VERSION = 3
-PATCHLEVEL = 11
+PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION =
-NAME = Linux for Workgroups
+EXTRAVERSION = -rc1
+NAME = One Giant Leap for Frogkind
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
index 082d9b4..35a300d 100644 (file)
@@ -7,7 +7,6 @@ config ALPHA
        select HAVE_PCSPKR_PLATFORM
        select HAVE_PERF_EVENTS
        select HAVE_DMA_ATTRS
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_PROBE
        select AUTO_IRQ_AFFINITY if SMP
index 40736da..ffb19b7 100644 (file)
@@ -338,6 +338,11 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len,
        unsigned long doff = 7 & (unsigned long) dst;
 
        if (len) {
+               if (!access_ok(VERIFY_READ, src, len)) {
+                       *errp = -EFAULT;
+                       memset(dst, 0, len);
+                       return sum;
+               }
                if (!doff) {
                        if (!soff)
                                checksum = csum_partial_cfu_aligned(
index 0c4132d..98838a0 100644 (file)
@@ -89,8 +89,7 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
        const struct exception_table_entry *fixup;
        int fault, si_code = SEGV_MAPERR;
        siginfo_t info;
-       unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                             (cause > 0 ? FAULT_FLAG_WRITE : 0));
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        /* As of EV6, a load into $31/$f31 is a prefetch, and never faults
           (or is suppressed by the PALcode).  Support that for older CPUs
@@ -115,7 +114,8 @@ do_page_fault(unsigned long address, unsigned long mmcsr,
        if (address >= TASK_SIZE)
                goto vmalloc_fault;
 #endif
-
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -142,6 +142,7 @@ retry:
        } else {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        }
 
        /* If for any reason at all we couldn't handle the fault,
index 68fcbb2..91dbb27 100644 (file)
@@ -20,7 +20,6 @@ config ARC
        select GENERIC_SMP_IDLE_THREAD
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_TRACEHOOK
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_IOREMAP_PROT
        select HAVE_KPROBES
        select HAVE_KRETPROBES
index 6fc1159..764f1e3 100644 (file)
@@ -11,7 +11,6 @@
 
 #include <asm-generic/sections.h>
 
-extern char _int_vec_base_lds[];
 extern char __arc_dccm_base[];
 extern char __dtb_start[];
 
index 2a913f8..0f944f0 100644 (file)
@@ -34,6 +34,9 @@ stext:
        ;       IDENTITY Reg [ 3  2  1  0 ]
        ;       (cpu-id)             ^^^        => Zero for UP ARC700
        ;                                       => #Core-ID if SMP (Master 0)
+       ; Note that non-boot CPUs might not land here if halt-on-reset and
+       ; instead breath life from @first_lines_of_secondary, but we still
+       ; need to make sure only boot cpu takes this path.
        GET_CPU_ID  r5
        cmp     r5, 0
        jnz     arc_platform_smp_wait_to_boot
@@ -98,6 +101,8 @@ stext:
 
 first_lines_of_secondary:
 
+       sr      @_int_vec_base_lds, [AUX_INTR_VEC_BASE]
+
        ; setup per-cpu idle task as "current" on this CPU
        ld      r0, [@secondary_idle_tsk]
        SET_CURR_TASK_ON_CPU  r0, r1
index 305b3f8..5fc9245 100644 (file)
@@ -24,7 +24,6 @@
  * -Needed for each CPU (hence not foldable into init_IRQ)
  *
  * what it does ?
- * -setup Vector Table Base Reg - in case Linux not linked at 0x8000_0000
  * -Disable all IRQs (on CPU side)
  * -Optionally, setup the High priority Interrupts as Level 2 IRQs
  */
index b011f8c..2c68bc7 100644 (file)
@@ -47,10 +47,7 @@ void read_arc_build_cfg_regs(void)
        READ_BCR(AUX_IDENTITY, cpu->core);
 
        cpu->timers = read_aux_reg(ARC_REG_TIMERS_BCR);
-
        cpu->vec_base = read_aux_reg(AUX_INTR_VEC_BASE);
-       if (cpu->vec_base == 0)
-               cpu->vec_base = (unsigned int)_int_vec_base_lds;
 
        READ_BCR(ARC_REG_D_UNCACH_BCR, uncached_space);
        cpu->uncached_base = uncached_space.start << 24;
index 0fd1f0d..d63f3de 100644 (file)
@@ -60,8 +60,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
        siginfo_t info;
        int fault, ret;
        int write = regs->ecr_cause & ECR_C_PROTV_STORE;  /* ST/EX */
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                               (write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        /*
         * We fault-in kernel-space virtual memory on-demand. The
@@ -89,6 +88,8 @@ void do_page_fault(struct pt_regs *regs, unsigned long address)
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -117,12 +118,12 @@ good_area:
        if (write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
                        goto bad_area;
        }
 
-survive:
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
@@ -201,10 +202,6 @@ no_context:
        die("Oops", regs, address);
 
 out_of_memory:
-       if (is_global_init(tsk)) {
-               yield();
-               goto survive;
-       }
        up_read(&mm->mmap_sem);
 
        if (user_mode(regs)) {
index c8a916f..3f7714d 100644 (file)
@@ -3,20 +3,21 @@ config ARM
        default y
        select ARCH_BINFMT_ELF_RANDOMIZE_PIE
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-       select ARCH_HAVE_CUSTOM_GPIO_H
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
+       select ARCH_HAVE_CUSTOM_GPIO_H
        select ARCH_WANT_IPC_PARSE_VERSION
        select BUILDTIME_EXTABLE_SORT if MMU
+       select CLONE_BACKWARDS
        select CPU_PM if (SUSPEND || CPU_IDLE)
        select DCACHE_WORD_ACCESS if (CPU_V6 || CPU_V6K || CPU_V7) && !CPU_BIG_ENDIAN && MMU
        select GENERIC_ATOMIC64 if (CPU_V7M || CPU_V6 || !CPU_32v6K || !AEABI)
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
+       select GENERIC_IDLE_POLL_SETUP
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
        select GENERIC_PCI_IOMAP
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
-       select GENERIC_IDLE_POLL_SETUP
        select GENERIC_STRNCPY_FROM_USER
        select GENERIC_STRNLEN_USER
        select HARDIRQS_SW_RESEND
@@ -25,6 +26,7 @@ config ARM
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_BPF_JIT
+       select HAVE_CONTEXT_TRACKING
        select HAVE_C_RECORDMCOUNT
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_API_DEBUG
@@ -35,7 +37,6 @@ config ARM
        select HAVE_FUNCTION_GRAPH_TRACER if (!THUMB2_KERNEL)
        select HAVE_FUNCTION_TRACER if (!XIP_KERNEL)
        select HAVE_GENERIC_DMA_COHERENT
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
        select HAVE_IDE if PCI || ISA || PCMCIA
        select HAVE_IRQ_TIME_ACCOUNTING
@@ -47,6 +48,7 @@ config ARM
        select HAVE_KPROBES if !XIP_KERNEL
        select HAVE_KRETPROBES if (HAVE_KPROBES)
        select HAVE_MEMBLOCK
+       select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
        select HAVE_OPROFILE if (HAVE_PERF_EVENTS)
        select HAVE_PERF_EVENTS
        select HAVE_REGS_AND_STACK_ACCESS_API
@@ -54,15 +56,14 @@ config ARM
        select HAVE_UID16
        select IRQ_FORCED_THREADING
        select KTIME_SCALAR
+       select MODULES_USE_ELF_REL
+       select OLD_SIGACTION
+       select OLD_SIGSUSPEND3
        select PERF_USE_VMALLOC
        select RTC_LIB
        select SYS_SUPPORTS_APM_EMULATION
-       select HAVE_MOD_ARCH_SPECIFIC if ARM_UNWIND
-       select MODULES_USE_ELF_REL
-       select CLONE_BACKWARDS
-       select OLD_SIGSUSPEND3
-       select OLD_SIGACTION
-       select HAVE_CONTEXT_TRACKING
+       # Above selects are sorted alphabetically; please add new ones
+       # according to that.  Thanks.
        help
          The ARM series is a line of low-power-consumption RISC chip designs
          licensed by ARM Ltd and targeted at embedded applications and
@@ -386,8 +387,8 @@ config ARCH_GEMINI
        bool "Cortina Systems Gemini"
        select ARCH_REQUIRE_GPIOLIB
        select ARCH_USES_GETTIMEOFFSET
-       select NEED_MACH_GPIO_H
        select CPU_FA526
+       select NEED_MACH_GPIO_H
        help
          Support for the Cortina Systems Gemini family SoCs
 
@@ -487,8 +488,8 @@ config ARCH_IXP4XX
        select GENERIC_CLOCKEVENTS
        select MIGHT_HAVE_PCI
        select NEED_MACH_IO_H
-       select USB_EHCI_BIG_ENDIAN_MMIO
        select USB_EHCI_BIG_ENDIAN_DESC
+       select USB_EHCI_BIG_ENDIAN_MMIO
        help
          Support for Intel's IXP4XX (XScale) family of processors.
 
@@ -498,11 +499,11 @@ config ARCH_DOVE
        select CPU_PJ4
        select GENERIC_CLOCKEVENTS
        select MIGHT_HAVE_PCI
+       select MVEBU_MBUS
        select PINCTRL
        select PINCTRL_DOVE
        select PLAT_ORION_LEGACY
        select USB_ARCH_HAS_EHCI
-       select MVEBU_MBUS
        help
          Support for the Marvell Dove SoC 88AP510
 
@@ -512,12 +513,12 @@ config ARCH_KIRKWOOD
        select ARCH_REQUIRE_GPIOLIB
        select CPU_FEROCEON
        select GENERIC_CLOCKEVENTS
+       select MVEBU_MBUS
        select PCI
        select PCI_QUIRKS
        select PINCTRL
        select PINCTRL_KIRKWOOD
        select PLAT_ORION_LEGACY
-       select MVEBU_MBUS
        help
          Support for the following Marvell Kirkwood series SoCs:
          88F6180, 88F6192 and 88F6281.
@@ -527,9 +528,9 @@ config ARCH_MV78XX0
        select ARCH_REQUIRE_GPIOLIB
        select CPU_FEROCEON
        select GENERIC_CLOCKEVENTS
+       select MVEBU_MBUS
        select PCI
        select PLAT_ORION_LEGACY
-       select MVEBU_MBUS
        help
          Support for the following Marvell MV78xx0 series SoCs:
          MV781x0, MV782x0.
@@ -540,9 +541,9 @@ config ARCH_ORION5X
        select ARCH_REQUIRE_GPIOLIB
        select CPU_FEROCEON
        select GENERIC_CLOCKEVENTS
+       select MVEBU_MBUS
        select PCI
        select PLAT_ORION_LEGACY
-       select MVEBU_MBUS
        help
          Support for the following Marvell Orion 5x series SoCs:
          Orion-1 (5181), Orion-VoIP (5181L), Orion-NAS (5182),
@@ -758,8 +759,8 @@ config ARCH_S5P64X0
        select HAVE_S3C2410_WATCHDOG if WATCHDOG
        select HAVE_S3C_RTC if RTC_CLASS
        select NEED_MACH_GPIO_H
-       select SAMSUNG_WDT_RESET
        select SAMSUNG_ATAGS
+       select SAMSUNG_WDT_RESET
        help
          Samsung S5P64X0 CPU based systems, such as the Samsung SMDK6440,
          SMDK6450.
@@ -777,8 +778,8 @@ config ARCH_S5PC100
        select HAVE_S3C2410_WATCHDOG if WATCHDOG
        select HAVE_S3C_RTC if RTC_CLASS
        select NEED_MACH_GPIO_H
-       select SAMSUNG_WDT_RESET
        select SAMSUNG_ATAGS
+       select SAMSUNG_WDT_RESET
        help
          Samsung S5PC100 series based systems
 
@@ -1619,9 +1620,10 @@ config HZ_FIXED
                ARCH_S5PV210 || ARCH_EXYNOS4
        default AT91_TIMER_HZ if ARCH_AT91
        default SHMOBILE_TIMER_HZ if ARCH_SHMOBILE
+       default 0
 
 choice
-       depends on !HZ_FIXED
+       depends on HZ_FIXED = 0
        prompt "Timer frequency"
 
 config HZ_100
@@ -1646,7 +1648,7 @@ endchoice
 
 config HZ
        int
-       default HZ_FIXED if HZ_FIXED
+       default HZ_FIXED if HZ_FIXED != 0
        default 100 if HZ_100
        default 200 if HZ_200
        default 250 if HZ_250
index 31b76f0..15e625e 100644 (file)
        compatible = "cubietech,cubieboard2", "allwinner,sun7i-a20";
 
        soc@01c00000 {
+               emac: ethernet@01c0b000 {
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&emac_pins_a>;
+                       phy = <&phy1>;
+                       status = "okay";
+               };
+
+               mdio@01c0b080 {
+                       status = "okay";
+
+                       phy1: ethernet-phy@1 {
+                               reg = <1>;
+                       };
+               };
+
                pinctrl@01c20800 {
                        led_pins_cubieboard2: led_pins@0 {
                                allwinner,pins = "PH20", "PH21";
index 34a6c02..9e77855 100644 (file)
        compatible = "olimex,a20-olinuxino-micro", "allwinner,sun7i-a20";
 
        soc@01c00000 {
+               emac: ethernet@01c0b000 {
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&emac_pins_a>;
+                       phy = <&phy1>;
+                       status = "okay";
+               };
+
+               mdio@01c0b080 {
+                       status = "okay";
+
+                       phy1: ethernet-phy@1 {
+                               reg = <1>;
+                       };
+               };
+
                pinctrl@01c20800 {
                        led_pins_olinuxino: led_pins@0 {
                                allwinner,pins = "PH2";
index 999ff45..80559cb 100644 (file)
                #size-cells = <1>;
                ranges;
 
+               emac: ethernet@01c0b000 {
+                       compatible = "allwinner,sun4i-emac";
+                       reg = <0x01c0b000 0x1000>;
+                       interrupts = <0 55 1>;
+                       clocks = <&ahb_gates 17>;
+                       status = "disabled";
+               };
+
+               mdio@01c0b080 {
+                       compatible = "allwinner,sun4i-mdio";
+                       reg = <0x01c0b080 0x14>;
+                       status = "disabled";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+               };
+
                pio: pinctrl@01c20800 {
                        compatible = "allwinner,sun7i-a20-pinctrl";
                        reg = <0x01c20800 0x400>;
                                allwinner,drive = <0>;
                                allwinner,pull = <0>;
                        };
+
+                       emac_pins_a: emac0@0 {
+                               allwinner,pins = "PA0", "PA1", "PA2",
+                                               "PA3", "PA4", "PA5", "PA6",
+                                               "PA7", "PA8", "PA9", "PA10",
+                                               "PA11", "PA12", "PA13", "PA14",
+                                               "PA15", "PA16";
+                               allwinner,function = "emac";
+                               allwinner,drive = <0>;
+                               allwinner,pull = <0>;
+                       };
                };
 
                timer@01c20c00 {
index 023ee63..e901d0f 100644 (file)
@@ -166,7 +166,8 @@ static int sp804_set_next_event(unsigned long next,
 }
 
 static struct clock_event_device sp804_clockevent = {
-       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
+       .features       = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT |
+               CLOCK_EVT_FEAT_DYNIRQ,
        .set_mode       = sp804_set_mode,
        .set_next_event = sp804_set_next_event,
        .rating         = 300,
index aaf3a87..bd454b0 100644 (file)
@@ -49,5 +49,5 @@ $(obj)/csumpartialcopyuser.o: $(obj)/csumpartialcopygeneric.S
 ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
   NEON_FLAGS                   := -mfloat-abi=softfp -mfpu=neon
   CFLAGS_xor-neon.o            += $(NEON_FLAGS)
-  lib-$(CONFIG_XOR_BLOCKS)     += xor-neon.o
+  obj-$(CONFIG_XOR_BLOCKS)     += xor-neon.o
 endif
index f485e5a..2c40aea 100644 (file)
@@ -9,6 +9,9 @@
  */
 
 #include <linux/raid/xor.h>
+#include <linux/module.h>
+
+MODULE_LICENSE("GPL");
 
 #ifndef __ARM_NEON__
 #error You should compile this file with '-mfloat-abi=softfp -mfpu=neon'
@@ -40,3 +43,4 @@ struct xor_block_template const xor_block_neon_inner = {
        .do_4   = xor_8regs_4,
        .do_5   = xor_8regs_5,
 };
+EXPORT_SYMBOL(xor_block_neon_inner);
index df8612f..3f12b88 100644 (file)
@@ -281,7 +281,7 @@ static AMBA_APB_DEVICE(uart1, "apb:uart1", 0x00041010, EP93XX_UART1_PHYS_BASE,
        { IRQ_EP93XX_UART1 }, &ep93xx_uart_data);
 
 static AMBA_APB_DEVICE(uart2, "apb:uart2", 0x00041010, EP93XX_UART2_PHYS_BASE,
-       { IRQ_EP93XX_UART2 }, &ep93xx_uart_data);
+       { IRQ_EP93XX_UART2 }, NULL);
 
 static AMBA_APB_DEVICE(uart3, "apb:uart3", 0x00041010, EP93XX_UART3_PHYS_BASE,
        { IRQ_EP93XX_UART3 }, &ep93xx_uart_data);
index 829b573..e2acff9 100644 (file)
@@ -18,7 +18,7 @@
 #include <linux/of_address.h>
 #include <linux/of_platform.h>
 #include <linux/io.h>
-#include <linux/time-armada-370-xp.h>
+#include <linux/clocksource.h>
 #include <linux/dma-mapping.h>
 #include <linux/mbus.h>
 #include <asm/hardware/cache-l2x0.h>
@@ -37,7 +37,7 @@ static void __init armada_370_xp_map_io(void)
 static void __init armada_370_xp_timer_and_clk_init(void)
 {
        of_clk_init(NULL);
-       armada_370_xp_timer_init();
+       clocksource_of_init();
        coherency_init();
        BUG_ON(mvebu_mbus_dt_init());
 #ifdef CONFIG_CACHE_L2X0
index 4872939..ffb6f0a 100644 (file)
@@ -96,7 +96,6 @@ static struct resource mmcif1_resources[] __initdata = {
 static struct sh_eth_plat_data ether_pdata __initdata = {
        .phy                    = 0x1,
        .edmac_endian           = EDMAC_LITTLE_ENDIAN,
-       .register_type          = SH_ETH_REG_FAST_RCAR,
        .phy_interface          = PHY_INTERFACE_MODE_RMII,
        .ether_link_active_low  = 1,
 };
index b5b2f78..ecd0148 100644 (file)
@@ -691,8 +691,8 @@ void __init __weak r8a7779_register_twd(void) { }
 void __init r8a7779_earlytimer_init(void)
 {
        r8a7779_clock_init();
-       shmobile_earlytimer_init();
        r8a7779_register_twd();
+       shmobile_earlytimer_init();
 }
 
 void __init r8a7779_add_early_devices(void)
index ec08740..6f938cc 100644 (file)
 /* PCI space */
 #define VERSATILE_PCI_BASE             0x41000000      /* PCI Interface */
 #define VERSATILE_PCI_CFG_BASE        0x42000000
+#define VERSATILE_PCI_IO_BASE          0x43000000
 #define VERSATILE_PCI_MEM_BASE0        0x44000000
 #define VERSATILE_PCI_MEM_BASE1        0x50000000
 #define VERSATILE_PCI_MEM_BASE2        0x60000000
 /* Sizes of above maps */
 #define VERSATILE_PCI_BASE_SIZE               0x01000000
 #define VERSATILE_PCI_CFG_BASE_SIZE    0x02000000
+#define VERSATILE_PCI_IO_BASE_SIZE     0x01000000
 #define VERSATILE_PCI_MEM_BASE0_SIZE   0x0c000000      /* 32Mb */
 #define VERSATILE_PCI_MEM_BASE1_SIZE   0x10000000      /* 256Mb */
 #define VERSATILE_PCI_MEM_BASE2_SIZE   0x10000000      /* 256Mb */
index e92e5e0..c97be4e 100644 (file)
@@ -43,9 +43,9 @@
 #define PCI_IMAP0              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0)
 #define PCI_IMAP1              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4)
 #define PCI_IMAP2              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8)
-#define PCI_SMAP0              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10)
-#define PCI_SMAP1              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
-#define PCI_SMAP2              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP0              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
+#define PCI_SMAP1              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SMAP2              __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x1c)
 #define PCI_SELFID             __IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc)
 
 #define DEVICE_ID_OFFSET               0x00
@@ -170,8 +170,8 @@ static struct pci_ops pci_versatile_ops = {
        .write  = versatile_write_config,
 };
 
-static struct resource io_mem = {
-       .name   = "PCI I/O space",
+static struct resource unused_mem = {
+       .name   = "PCI unused",
        .start  = VERSATILE_PCI_MEM_BASE0,
        .end    = VERSATILE_PCI_MEM_BASE0+VERSATILE_PCI_MEM_BASE0_SIZE-1,
        .flags  = IORESOURCE_MEM,
@@ -195,9 +195,9 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 {
        int ret = 0;
 
-       ret = request_resource(&iomem_resource, &io_mem);
+       ret = request_resource(&iomem_resource, &unused_mem);
        if (ret) {
-               printk(KERN_ERR "PCI: unable to allocate I/O "
+               printk(KERN_ERR "PCI: unable to allocate unused "
                       "memory region (%d)\n", ret);
                goto out;
        }
@@ -205,7 +205,7 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
        if (ret) {
                printk(KERN_ERR "PCI: unable to allocate non-prefetchable "
                       "memory region (%d)\n", ret);
-               goto release_io_mem;
+               goto release_unused_mem;
        }
        ret = request_resource(&iomem_resource, &pre_mem);
        if (ret) {
@@ -225,8 +225,8 @@ static int __init pci_versatile_setup_resources(struct pci_sys_data *sys)
 
  release_non_mem:
        release_resource(&non_mem);
- release_io_mem:
-       release_resource(&io_mem);
+ release_unused_mem:
+       release_resource(&unused_mem);
  out:
        return ret;
 }
@@ -246,7 +246,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
                goto out;
        }
 
-       ret = pci_ioremap_io(0, VERSATILE_PCI_MEM_BASE0);
+       ret = pci_ioremap_io(0, VERSATILE_PCI_IO_BASE);
        if (ret)
                goto out;
 
@@ -294,6 +294,19 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
        __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_1);
        __raw_writel(PHYS_OFFSET, local_pci_cfg_base + PCI_BASE_ADDRESS_2);
 
+       /*
+        * For many years the kernel and QEMU were symbiotically buggy
+        * in that they both assumed the same broken IRQ mapping.
+        * QEMU therefore attempts to auto-detect old broken kernels
+        * so that they still work on newer QEMU as they did on old
+        * QEMU. Since we now use the correct (ie matching-hardware)
+        * IRQ mapping we write a definitely different value to a
+        * PCI_INTERRUPT_LINE register to tell QEMU that we expect
+        * real hardware behaviour and it need not be backwards
+        * compatible for us. This write is harmless on real hardware.
+        */
+       __raw_writel(0, VERSATILE_PCI_VIRT_BASE+PCI_INTERRUPT_LINE);
+
        /*
         * Do not to map Versatile FPGA PCI device into memory space
         */
@@ -327,13 +340,13 @@ static int __init versatile_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
 {
        int irq;
 
-       /* slot,  pin,  irq
-        *  24     1     IRQ_SIC_PCI0
-        *  25     1     IRQ_SIC_PCI1
-        *  26     1     IRQ_SIC_PCI2
-        *  27     1     IRQ_SIC_PCI3
+       /*
+        * Slot INTA    INTB    INTC    INTD
+        * 31   PCI1    PCI2    PCI3    PCI0
+        * 30   PCI0    PCI1    PCI2    PCI3
+        * 29   PCI3    PCI0    PCI1    PCI2
         */
-       irq = IRQ_SIC_PCI0 + ((slot - 24 + pin - 1) & 3);
+       irq = IRQ_SIC_PCI0 + ((slot + 2 + pin - 1) & 3);
 
        return irq;
 }
index 36ea824..505e64a 100644 (file)
@@ -7,6 +7,8 @@ ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
 obj-y                                  := v2m.o
 obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)      += ct-ca9x4.o
 obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)      += dcscb.o      dcscb_setup.o
+CFLAGS_dcscb.o                         += -march=armv7-a
 obj-$(CONFIG_ARCH_VEXPRESS_TC2_PM)     += tc2_pm.o spc.o
+CFLAGS_tc2_pm.o                                += -march=armv7-a
 obj-$(CONFIG_SMP)                      += platsmp.o
 obj-$(CONFIG_HOTPLUG_CPU)              += hotplug.o
index c97f794..eb8830a 100644 (file)
@@ -261,9 +261,7 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        struct task_struct *tsk;
        struct mm_struct *mm;
        int fault, sig, code;
-       int write = fsr & FSR_WRITE;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                               (write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        if (notify_page_fault(regs, fsr))
                return 0;
@@ -282,6 +280,11 @@ do_page_fault(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+       if (fsr & FSR_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+
        /*
         * As per x86, we may deadlock here.  However, since the kernel only
         * validly references user space from well defined areas of the code,
@@ -349,6 +352,13 @@ retry:
        if (likely(!(fault & (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
                return 0;
 
+       /*
+        * If we are in kernel mode at this point, we
+        * have no context to handle this fault with.
+        */
+       if (!user_mode(regs))
+               goto no_context;
+
        if (fault & VM_FAULT_OOM) {
                /*
                 * We ran out of memory, call the OOM killer, and return to
@@ -359,13 +369,6 @@ retry:
                return 0;
        }
 
-       /*
-        * If we are in kernel mode at this point, we
-        * have no context to handle this fault with.
-        */
-       if (!user_mode(regs))
-               goto no_context;
-
        if (fault & VM_FAULT_SIGBUS) {
                /*
                 * We had some memory, but were unable to
index 66781bf..54ee616 100644 (file)
@@ -56,3 +56,8 @@ int pmd_huge(pmd_t pmd)
 {
        return pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT);
 }
+
+int pmd_huge_support(void)
+{
+       return 1;
+}
index c83f27b..3ea0290 100644 (file)
@@ -132,6 +132,7 @@ static int pxa_ssp_probe(struct platform_device *pdev)
        if (dev->of_node) {
                struct of_phandle_args dma_spec;
                struct device_node *np = dev->of_node;
+               int ret;
 
                /*
                 * FIXME: we should allocate the DMA channel from this
@@ -140,14 +141,23 @@ static int pxa_ssp_probe(struct platform_device *pdev)
                 */
 
                /* rx */
-               of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-                                          0, &dma_spec);
+               ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
+                                                0, &dma_spec);
+
+               if (ret) {
+                       dev_err(dev, "Can't parse dmas property\n");
+                       return -ENODEV;
+               }
                ssp->drcmr_rx = dma_spec.args[0];
                of_node_put(dma_spec.np);
 
                /* tx */
-               of_parse_phandle_with_args(np, "dmas", "#dma-cells",
-                                          1, &dma_spec);
+               ret = of_parse_phandle_with_args(np, "dmas", "#dma-cells",
+                                                1, &dma_spec);
+               if (ret) {
+                       dev_err(dev, "Can't parse dmas property\n");
+                       return -ENODEV;
+               }
                ssp->drcmr_tx = dma_spec.args[0];
                of_node_put(dma_spec.np);
        } else {
index ae323a4..c044548 100644 (file)
@@ -23,7 +23,6 @@ config ARM64
        select HAVE_DMA_API_DEBUG
        select HAVE_DMA_ATTRS
        select HAVE_GENERIC_DMA_COHERENT
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_HW_BREAKPOINT if PERF_EVENTS
        select HAVE_MEMBLOCK
        select HAVE_PERF_EVENTS
index 6c8ba25..6d6acf1 100644 (file)
@@ -199,13 +199,6 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        unsigned long vm_flags = VM_READ | VM_WRITE | VM_EXEC;
        unsigned int mm_flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
-       if (esr & ESR_LNX_EXEC) {
-               vm_flags = VM_EXEC;
-       } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
-               vm_flags = VM_WRITE;
-               mm_flags |= FAULT_FLAG_WRITE;
-       }
-
        tsk = current;
        mm  = tsk->mm;
 
@@ -220,6 +213,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               mm_flags |= FAULT_FLAG_USER;
+
+       if (esr & ESR_LNX_EXEC) {
+               vm_flags = VM_EXEC;
+       } else if ((esr & ESR_WRITE) && !(esr & ESR_CM)) {
+               vm_flags = VM_WRITE;
+               mm_flags |= FAULT_FLAG_WRITE;
+       }
+
        /*
         * As per x86, we may deadlock here. However, since the kernel only
         * validly references user space from well defined areas of the code,
@@ -288,6 +291,13 @@ retry:
                              VM_FAULT_BADACCESS))))
                return 0;
 
+       /*
+        * If we are in kernel mode at this point, we have no context to
+        * handle this fault with.
+        */
+       if (!user_mode(regs))
+               goto no_context;
+
        if (fault & VM_FAULT_OOM) {
                /*
                 * We ran out of memory, call the OOM killer, and return to
@@ -298,13 +308,6 @@ retry:
                return 0;
        }
 
-       /*
-        * If we are in kernel mode at this point, we have no context to
-        * handle this fault with.
-        */
-       if (!user_mode(regs))
-               goto no_context;
-
        if (fault & VM_FAULT_SIGBUS) {
                /*
                 * We had some memory, but were unable to successfully fix up
index 2fc8258..5e9aec3 100644 (file)
@@ -54,6 +54,11 @@ int pud_huge(pud_t pud)
        return !(pud_val(pud) & PUD_TABLE_BIT);
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
+
 static __init int setup_hugepagesz(char *opt)
 {
        unsigned long ps = memparse(opt, &opt);
index 549903c..b6878eb 100644 (file)
@@ -6,7 +6,6 @@ config AVR32
        select HAVE_CLK
        select HAVE_OPROFILE
        select HAVE_KPROBES
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_PROBE
        select GENERIC_ATOMIC64
index b2f2d2d..0eca933 100644 (file)
@@ -86,6 +86,8 @@ asmlinkage void do_page_fault(unsigned long ecr, struct pt_regs *regs)
 
        local_irq_enable();
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
 
@@ -228,9 +230,9 @@ no_context:
         */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       pagefault_out_of_memory();
        if (!user_mode(regs))
                goto no_context;
+       pagefault_out_of_memory();
        return;
 
 do_sigbus:
index 3b6abc5..f78c9a2 100644 (file)
@@ -32,7 +32,6 @@ config BLACKFIN
        select HAVE_UNDERSCORE_SYMBOL_PREFIX
        select VIRT_TO_BUS
        select ARCH_WANT_IPC_PARSE_VERSION
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_ATOMIC64
        select GENERIC_IRQ_PROBE
        select USE_GENERIC_SMP_HELPERS if SMP
diff --git a/arch/blackfin/include/asm/scb.h b/arch/blackfin/include/asm/scb.h
new file mode 100644 (file)
index 0000000..a294cc0
--- /dev/null
@@ -0,0 +1,21 @@
+/*
+ * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#define SCB_SLOT_OFFSET        24
+#define SCB_MI_MAX_SLOT 32
+
+struct scb_mi_prio {
+       unsigned long scb_mi_arbr;
+       unsigned long scb_mi_arbw;
+       unsigned char scb_mi_slots;
+       unsigned char scb_mi_prio[SCB_MI_MAX_SLOT];
+};
+
+extern struct scb_mi_prio scb_data[];
+
+extern void init_scb(void);
index 19ad063..3961930 100644 (file)
@@ -35,6 +35,9 @@
 #ifdef CONFIG_BF60x
 #include <mach/pm.h>
 #endif
+#ifdef CONFIG_SCB_PRIORITY
+#include <asm/scb.h>
+#endif
 
 u16 _bfin_swrst;
 EXPORT_SYMBOL(_bfin_swrst);
@@ -1101,6 +1104,9 @@ void __init setup_arch(char **cmdline_p)
 #endif
        init_exception_vectors();
        bfin_cache_init();      /* Initialize caches for the boot CPU */
+#ifdef CONFIG_SCB_PRIORITY
+       init_scb();
+#endif
 }
 
 static int __init topology_init(void)
index 95a4f1b..2bcbf94 100644 (file)
@@ -59,6 +59,1661 @@ config SEC_IRQ_PRIORITY_LEVELS
          Divide the total number of interrupt priority levels into sub-levels.
          There is 2 ^ (SEC_IRQ_PRIORITY_LEVELS + 1) different levels.
 
+
+comment "System Cross Bar Priority Assignment"
+
+config SCB_PRIORITY
+       bool "Init System Cross Bar Priority"
+       default n
+
+menuconfig     SCB0_MI0
+       bool "SCB0 Master Interface 0 (DDR)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       Core 0  -- 0
+       Core 1  -- 2
+       SCB1    -- 9
+       SCB2    -- 10
+       SCB3    -- 11
+       SCB4    -- 12
+       SCB5    -- 5
+       SCB6    -- 6
+       SCB7    -- 8
+       SCB8    -- 7
+       SCB9    -- 4
+       USB     -- 13
+
+if SCB0_MI0
+
+config SCB0_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI0_SLOT10
+       int "Slot 10 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI0_SLOT11
+       int "Slot 11 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI0_SLOT12
+       int "Slot 12 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI0_SLOT13
+       int "Slot 13 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI0_SLOT14
+       int "Slot 14 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI0_SLOT15
+       int "Slot 15 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI0_SLOT16
+       int "Slot 16 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI0_SLOT17
+       int "Slot 17 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI0_SLOT18
+       int "Slot 18 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI0_SLOT19
+       int "Slot 19 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI0_SLOT20
+       int "Slot 20 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI0_SLOT21
+       int "Slot 21 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI0_SLOT22
+       int "Slot 22 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI0_SLOT23
+       int "Slot 23 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI0_SLOT24
+       int "Slot 24 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI0_SLOT25
+       int "Slot 25 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI0_SLOT26
+       int "Slot 26 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI0_SLOT27
+       int "Slot 27 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI0_SLOT28
+       int "Slot 28 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI0_SLOT29
+       int "Slot 29 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI0_SLOT30
+       int "Slot 30 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI0_SLOT31
+       int "Slot 31 slave interface id"
+       default 13
+       range 0 13
+
+endif # SCB0_MI0
+
+menuconfig     SCB0_MI1
+       bool "SCB0 Master Interface 1 (SMC)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       Core 0  -- 0
+       Core 1  -- 2
+       SCB1    -- 9
+       SCB2    -- 10
+       SCB3    -- 11
+       SCB4    -- 12
+       SCB5    -- 5
+       SCB6    -- 6
+       SCB7    -- 8
+       SCB8    -- 7
+       SCB9    -- 4
+       USB     -- 13
+
+if SCB0_MI1
+
+config SCB0_MI1_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI1_SLOT1
+       int "Slot 1 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI1_SLOT2
+       int "Slot 2 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI1_SLOT3
+       int "Slot 3 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI1_SLOT4
+       int "Slot 4 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI1_SLOT5
+       int "Slot 5 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI1_SLOT6
+       int "Slot 6 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI1_SLOT7
+       int "Slot 7 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI1_SLOT8
+       int "Slot 8 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI1_SLOT9
+       int "Slot 9 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI1_SLOT10
+       int "Slot 10 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI1_SLOT11
+       int "Slot 11 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI1_SLOT12
+       int "Slot 12 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI1_SLOT13
+       int "Slot 13 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI1_SLOT14
+       int "Slot 14 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI1_SLOT15
+       int "Slot 15 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI1_SLOT16
+       int "Slot 16 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI1_SLOT17
+       int "Slot 17 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI1_SLOT18
+       int "Slot 18 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI1_SLOT19
+       int "Slot 19 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI1_SLOT20
+       int "Slot 20 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI1_SLOT21
+       int "Slot 21 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI1_SLOT22
+       int "Slot 22 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI1_SLOT23
+       int "Slot 23 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI1_SLOT24
+       int "Slot 24 slave interface id"
+       default 0
+       range 0 13
+
+config SCB0_MI1_SLOT25
+       int "Slot 25 slave interface id"
+       default 2
+       range 0 13
+
+config SCB0_MI1_SLOT26
+       int "Slot 26 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI1_SLOT27
+       int "Slot 27 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI1_SLOT28
+       int "Slot 28 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI1_SLOT29
+       int "Slot 29 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI1_SLOT30
+       int "Slot 30 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI1_SLOT31
+       int "Slot 31 slave interface id"
+       default 13
+       range 0 13
+
+endif # SCB0_MI1
+
+menuconfig     SCB0_MI2
+       bool "SCB0 Master Interface 2 (Data L2)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       Core 0  -- 0
+       Core 1  -- 2
+       SCB1    -- 9
+       SCB2    -- 10
+       SCB3    -- 11
+       SCB4    -- 12
+       SCB5    -- 5
+       SCB6    -- 6
+       SCB7    -- 8
+       SCB8    -- 7
+       SCB9    -- 4
+       USB     -- 13
+
+if SCB0_MI2
+
+config SCB0_MI2_SLOT0
+       int "Slot 0 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI2_SLOT1
+       int "Slot 1 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI2_SLOT2
+       int "Slot 2 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI2_SLOT3
+       int "Slot 3 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI2_SLOT4
+       int "Slot 4 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI2_SLOT5
+       int "Slot 5 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI2_SLOT6
+       int "Slot 6 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI2_SLOT7
+       int "Slot 7 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI2_SLOT8
+       int "Slot 8 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI2_SLOT9
+       int "Slot 9 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI2_SLOT10
+       int "Slot 10 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI2_SLOT11
+       int "Slot 11 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI2_SLOT12
+       int "Slot 12 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI2_SLOT13
+       int "Slot 13 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI2_SLOT14
+       int "Slot 14 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI2_SLOT15
+       int "Slot 15 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI2_SLOT16
+       int "Slot 16 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI2_SLOT17
+       int "Slot 17 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI2_SLOT18
+       int "Slot 18 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI2_SLOT19
+       int "Slot 19 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI2_SLOT20
+       int "Slot 20 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI2_SLOT21
+       int "Slot 21 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI2_SLOT22
+       int "Slot 22 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI2_SLOT23
+       int "Slot 23 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI2_SLOT24
+       int "Slot 24 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI2_SLOT25
+       int "Slot 25 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI2_SLOT26
+       int "Slot 26 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI2_SLOT27
+       int "Slot 27 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI2_SLOT28
+       int "Slot 28 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI2_SLOT29
+       int "Slot 29 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI2_SLOT30
+       int "Slot 30 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI2_SLOT31
+       int "Slot 31 slave interface id"
+       default 7
+       range 0 13
+
+endif # SCB0_MI2
+
+menuconfig     SCB0_MI3
+       bool "SCB0 Master Interface 3 (L1A)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       Core 0  -- 0
+       Core 1  -- 2
+       SCB1    -- 9
+       SCB2    -- 10
+       SCB3    -- 11
+       SCB4    -- 12
+       SCB5    -- 5
+       SCB6    -- 6
+       SCB7    -- 8
+       SCB8    -- 7
+       SCB9    -- 4
+       USB     -- 13
+
+if SCB0_MI3
+
+config SCB0_MI3_SLOT0
+       int "Slot 0 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI3_SLOT1
+       int "Slot 1 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI3_SLOT2
+       int "Slot 2 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI3_SLOT3
+       int "Slot 3 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI3_SLOT4
+       int "Slot 4 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI3_SLOT5
+       int "Slot 5 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI3_SLOT6
+       int "Slot 6 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI3_SLOT7
+       int "Slot 7 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI3_SLOT8
+       int "Slot 8 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI3_SLOT9
+       int "Slot 9 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI3_SLOT10
+       int "Slot 10 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI3_SLOT11
+       int "Slot 11 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI3_SLOT12
+       int "Slot 12 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI3_SLOT13
+       int "Slot 13 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI3_SLOT14
+       int "Slot 14 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI3_SLOT15
+       int "Slot 15 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI3_SLOT16
+       int "Slot 16 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI3_SLOT17
+       int "Slot 17 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI3_SLOT18
+       int "Slot 18 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI3_SLOT19
+       int "Slot 19 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI3_SLOT20
+       int "Slot 20 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI3_SLOT21
+       int "Slot 21 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI3_SLOT22
+       int "Slot 22 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI3_SLOT23
+       int "Slot 23 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI3_SLOT24
+       int "Slot 24 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI3_SLOT25
+       int "Slot 25 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI3_SLOT26
+       int "Slot 26 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI3_SLOT27
+       int "Slot 27 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI3_SLOT28
+       int "Slot 28 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI3_SLOT29
+       int "Slot 29 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI3_SLOT30
+       int "Slot 30 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI3_SLOT31
+       int "Slot 31 slave interface id"
+       default 7
+       range 0 13
+
+endif # SCB0_MI3
+
+menuconfig     SCB0_MI4
+       bool "SCB0 Master Interface 4 (L1B)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       Core 0  -- 0
+       Core 1  -- 2
+       SCB1    -- 9
+       SCB2    -- 10
+       SCB3    -- 11
+       SCB4    -- 12
+       SCB5    -- 5
+       SCB6    -- 6
+       SCB7    -- 8
+       SCB8    -- 7
+       SCB9    -- 4
+       USB     -- 13
+
+if SCB0_MI4
+
+config SCB0_MI4_SLOT0
+       int "Slot 0 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI4_SLOT1
+       int "Slot 1 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI4_SLOT2
+       int "Slot 2 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI4_SLOT3
+       int "Slot 3 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI4_SLOT4
+       int "Slot 4 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI4_SLOT5
+       int "Slot 5 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI4_SLOT6
+       int "Slot 6 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI4_SLOT7
+       int "Slot 7 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI4_SLOT8
+       int "Slot 8 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI4_SLOT9
+       int "Slot 9 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI4_SLOT10
+       int "Slot 10 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI4_SLOT11
+       int "Slot 11 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI4_SLOT12
+       int "Slot 12 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI4_SLOT13
+       int "Slot 13 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI4_SLOT14
+       int "Slot 14 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI4_SLOT15
+       int "Slot 15 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI4_SLOT16
+       int "Slot 16 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI4_SLOT17
+       int "Slot 17 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI4_SLOT18
+       int "Slot 18 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI4_SLOT19
+       int "Slot 19 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI4_SLOT20
+       int "Slot 20 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI4_SLOT21
+       int "Slot 21 slave interface id"
+       default 5
+       range 0 13
+
+config SCB0_MI4_SLOT22
+       int "Slot 22 slave interface id"
+       default 6
+       range 0 13
+
+config SCB0_MI4_SLOT23
+       int "Slot 23 slave interface id"
+       default 7
+       range 0 13
+
+config SCB0_MI4_SLOT24
+       int "Slot 24 slave interface id"
+       default 8
+       range 0 13
+
+config SCB0_MI4_SLOT25
+       int "Slot 25 slave interface id"
+       default 9
+       range 0 13
+
+config SCB0_MI4_SLOT26
+       int "Slot 26 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI4_SLOT27
+       int "Slot 27 slave interface id"
+       default 11
+       range 0 13
+
+config SCB0_MI4_SLOT28
+       int "Slot 28 slave interface id"
+       default 13
+       range 0 13
+
+config SCB0_MI4_SLOT29
+       int "Slot 29 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI4_SLOT30
+       int "Slot 30 slave interface id"
+       default 4
+       range 0 13
+
+config SCB0_MI4_SLOT31
+       int "Slot 31 slave interface id"
+       default 7
+       range 0 13
+
+endif # SCB0_MI4
+
+menuconfig     SCB0_MI5
+       bool "SCB0 Master Interface 5 (SMMR)"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       MMR0    -- 1
+       MMR1    -- 3
+       SCB2    -- 10
+       SCB4    -- 12
+
+if SCB0_MI5
+
+config SCB0_MI5_SLOT0
+       int "Slot 0 slave interface id"
+       default 1
+       range 0 13
+
+config SCB0_MI5_SLOT1
+       int "Slot 1 slave interface id"
+       default 3
+       range 0 13
+
+config SCB0_MI5_SLOT2
+       int "Slot 2 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI5_SLOT3
+       int "Slot 3 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI5_SLOT4
+       int "Slot 4 slave interface id"
+       default 1
+       range 0 13
+
+config SCB0_MI5_SLOT5
+       int "Slot 5 slave interface id"
+       default 3
+       range 0 13
+
+config SCB0_MI5_SLOT6
+       int "Slot 6 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI5_SLOT7
+       int "Slot 7 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI5_SLOT8
+       int "Slot 8 slave interface id"
+       default 1
+       range 0 13
+
+config SCB0_MI5_SLOT9
+       int "Slot 9 slave interface id"
+       default 3
+       range 0 13
+
+config SCB0_MI5_SLOT10
+       int "Slot 10 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI5_SLOT11
+       int "Slot 11 slave interface id"
+       default 12
+       range 0 13
+
+config SCB0_MI5_SLOT12
+       int "Slot 12 slave interface id"
+       default 1
+       range 0 13
+
+config SCB0_MI5_SLOT13
+       int "Slot 13 slave interface id"
+       default 3
+       range 0 13
+
+config SCB0_MI5_SLOT14
+       int "Slot 14 slave interface id"
+       default 10
+       range 0 13
+
+config SCB0_MI5_SLOT15
+       int "Slot 15 slave interface id"
+       default 12
+       range 0 13
+
+endif # SCB0_MI5
+
+menuconfig     SCB1_MI0
+       bool "SCB1 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       SPORT0A -- 0
+       SPORT0B -- 1
+       SPORT1A -- 2
+       SPORT1B -- 3
+       SPORT2A -- 4
+       SPORT2B -- 5
+       SPI0TX  -- 6
+       SPI0RX  -- 7
+       SPI1TX  -- 8
+       SPI1RX  -- 9
+
+if SCB1_MI0
+
+config SCB1_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 9
+
+config SCB1_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 9
+
+config SCB1_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 9
+
+config SCB1_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 9
+
+config SCB1_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 4
+       range 0 9
+
+config SCB1_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 5
+       range 0 9
+
+config SCB1_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 6
+       range 0 9
+
+config SCB1_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 7
+       range 0 9
+
+config SCB1_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 8
+       range 0 9
+
+config SCB1_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 9
+       range 0 9
+
+config SCB1_MI0_SLOT10
+       int "Slot 10 slave interface id"
+       default 0
+       range 0 9
+
+config SCB1_MI0_SLOT11
+       int "Slot 11 slave interface id"
+       default 1
+       range 0 9
+
+config SCB1_MI0_SLOT12
+       int "Slot 12 slave interface id"
+       default 2
+       range 0 9
+
+config SCB1_MI0_SLOT13
+       int "Slot 13 slave interface id"
+       default 3
+       range 0 9
+
+config SCB1_MI0_SLOT14
+       int "Slot 14 slave interface id"
+       default 4
+       range 0 9
+
+config SCB1_MI0_SLOT15
+       int "Slot 15 slave interface id"
+       default 5
+       range 0 9
+
+config SCB1_MI0_SLOT16
+       int "Slot 16 slave interface id"
+       default 6
+       range 0 13
+
+config SCB1_MI0_SLOT17
+       int "Slot 17 slave interface id"
+       default 7
+       range 0 13
+
+config SCB1_MI0_SLOT18
+       int "Slot 18 slave interface id"
+       default 8
+       range 0 13
+
+config SCB1_MI0_SLOT19
+       int "Slot 19 slave interface id"
+       default 9
+       range 0 13
+
+endif # SCB1_MI0
+
+menuconfig     SCB2_MI0
+       bool "SCB2 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       RSI     -- 0
+       SDU DMA -- 1
+       SDU     -- 2
+       EMAC0   -- 3
+       EMAC1   -- 4
+
+if SCB2_MI0
+
+config SCB2_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 4
+
+config SCB2_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 4
+
+config SCB2_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 4
+
+config SCB2_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 4
+
+config SCB2_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 4
+       range 0 4
+
+config SCB2_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 0
+       range 0 4
+
+config SCB2_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 1
+       range 0 4
+
+config SCB2_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 2
+       range 0 4
+
+config SCB2_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 3
+       range 0 4
+
+config SCB2_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 4
+       range 0 4
+
+endif # SCB2_MI0
+
+menuconfig     SCB3_MI0
+       bool "SCB3 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       LP0     -- 0
+       LP1     -- 1
+       LP2     -- 2
+       LP3     -- 3
+       UART0TX -- 4
+       UART0RX -- 5
+       UART1TX -- 4
+       UART1RX -- 5
+
+if SCB3_MI0
+
+config SCB3_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 7
+
+config SCB3_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 7
+
+config SCB3_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 7
+
+config SCB3_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 7
+
+config SCB3_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 4
+       range 0 7
+
+config SCB3_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 5
+       range 0 7
+
+config SCB3_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 6
+       range 0 7
+
+config SCB3_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 7
+       range 0 7
+
+config SCB3_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 0
+       range 0 7
+
+config SCB3_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 1
+       range 0 7
+
+config SCB3_MI0_SLOT10
+       int "Slot 10 slave interface id"
+       default 2
+       range 0 7
+
+config SCB3_MI0_SLOT11
+       int "Slot 11 slave interface id"
+       default 3
+       range 0 7
+
+config SCB3_MI0_SLOT12
+       int "Slot 12 slave interface id"
+       default 4
+       range 0 7
+
+config SCB3_MI0_SLOT13
+       int "Slot 13 slave interface id"
+       default 5
+       range 0 7
+
+config SCB3_MI0_SLOT14
+       int "Slot 14 slave interface id"
+       default 6
+       range 0 7
+
+config SCB3_MI0_SLOT15
+       int "Slot 15 slave interface id"
+       default 7
+       range 0 7
+
+endif # SCB3_MI0
+
+menuconfig     SCB4_MI0
+       bool "SCB4 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       MDA21   -- 0
+       MDA22   -- 1
+       MDA23   -- 2
+       MDA24   -- 3
+       MDA25   -- 4
+       MDA26   -- 5
+       MDA27   -- 6
+       MDA28   -- 7
+
+if SCB4_MI0
+
+config SCB4_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 7
+
+config SCB4_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 7
+
+config SCB4_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 7
+
+config SCB4_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 7
+
+config SCB4_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 4
+       range 0 7
+
+config SCB4_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 5
+       range 0 7
+
+config SCB4_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 6
+       range 0 7
+
+config SCB4_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 7
+       range 0 7
+
+config SCB4_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 0
+       range 0 7
+
+config SCB4_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 1
+       range 0 7
+
+config SCB4_MI0_SLOT10
+       int "Slot 10 slave interface id"
+       default 2
+       range 0 7
+
+config SCB4_MI0_SLOT11
+       int "Slot 11 slave interface id"
+       default 3
+       range 0 7
+
+config SCB4_MI0_SLOT12
+       int "Slot 12 slave interface id"
+       default 4
+       range 0 7
+
+config SCB4_MI0_SLOT13
+       int "Slot 13 slave interface id"
+       default 5
+       range 0 7
+
+config SCB4_MI0_SLOT14
+       int "Slot 14 slave interface id"
+       default 6
+       range 0 7
+
+config SCB4_MI0_SLOT15
+       int "Slot 15 slave interface id"
+       default 7
+       range 0 7
+
+endif # SCB4_MI0
+
+menuconfig     SCB5_MI0
+       bool "SCB5 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       PPI0 MDA29      -- 0
+       PPI0 MDA30      -- 1
+       PPI2 MDA31      -- 2
+       PPI2 MDA32      -- 3
+
+if SCB5_MI0
+
+config SCB5_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 3
+
+config SCB5_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 3
+
+config SCB5_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 3
+
+config SCB5_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 3
+
+config SCB5_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 0
+       range 0 3
+
+config SCB5_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 1
+       range 0 3
+
+config SCB5_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 2
+       range 0 3
+
+config SCB5_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 3
+       range 0 3
+
+endif # SCB5_MI0
+
+menuconfig     SCB6_MI0
+       bool "SCB6 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       PPI1 MDA33      -- 0
+       PPI1 MDA34      -- 1
+
+if SCB6_MI0
+
+config SCB6_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 1
+
+config SCB6_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 1
+
+config SCB6_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 0
+       range 0 1
+
+config SCB6_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 1
+       range 0 1
+
+endif # SCB6_MI0
+
+menuconfig     SCB7_MI0
+       bool "SCB7 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       PIXC0   -- 0
+       PIXC1   -- 1
+       PIXC2   -- 2
+
+if SCB7_MI0
+
+config SCB7_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 2
+
+config SCB7_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 2
+
+config SCB7_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 2
+
+config SCB7_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 0
+       range 0 2
+
+config SCB7_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 1
+       range 0 2
+
+config SCB7_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 2
+       range 0 2
+
+endif # SCB7_MI0
+
+menuconfig     SCB8_MI0
+       bool "SCB8 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       PVP CPDOB       -- 0
+       PVP CPDOC       -- 1
+       PVP CPCO        -- 2
+       PVP CPCI        -- 3
+
+if SCB8_MI0
+
+config SCB8_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 3
+
+config SCB8_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 3
+
+config SCB8_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 3
+
+config SCB8_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 3
+
+config SCB8_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 0
+       range 0 3
+
+config SCB8_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 1
+       range 0 3
+
+config SCB8_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 2
+       range 0 3
+
+config SCB8_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 3
+       range 0 3
+
+endif # SCB8_MI0
+
+menuconfig     SCB9_MI0
+       bool "SCB9 Master Interface 0"
+       default n
+       depends on SCB_PRIORITY
+       help
+       The slave interface id of each slot should be set according following table.
+       PVP MPDO        -- 0
+       PVP MPDI        -- 1
+       PVP MPCO        -- 2
+       PVP MPCI        -- 3
+       PVP CPDOA       -- 4
+
+if SCB9_MI0
+
+config SCB9_MI0_SLOT0
+       int "Slot 0 slave interface id"
+       default 0
+       range 0 4
+
+config SCB9_MI0_SLOT1
+       int "Slot 1 slave interface id"
+       default 1
+       range 0 4
+
+config SCB9_MI0_SLOT2
+       int "Slot 2 slave interface id"
+       default 2
+       range 0 4
+
+config SCB9_MI0_SLOT3
+       int "Slot 3 slave interface id"
+       default 3
+       range 0 4
+
+config SCB9_MI0_SLOT4
+       int "Slot 4 slave interface id"
+       default 4
+       range 0 4
+
+config SCB9_MI0_SLOT5
+       int "Slot 5 slave interface id"
+       default 0
+       range 0 4
+
+config SCB9_MI0_SLOT6
+       int "Slot 6 slave interface id"
+       default 1
+       range 0 4
+
+config SCB9_MI0_SLOT7
+       int "Slot 7 slave interface id"
+       default 2
+       range 0 4
+
+config SCB9_MI0_SLOT8
+       int "Slot 8 slave interface id"
+       default 3
+       range 0 4
+
+config SCB9_MI0_SLOT9
+       int "Slot 9 slave interface id"
+       default 4
+       range 0 4
+
+endif # SCB9_MI0
+
 endmenu
 
 endif
index 234fe1b..60ffaf8 100644 (file)
@@ -4,3 +4,4 @@
 
 obj-y := dma.o clock.o ints-priority.o
 obj-$(CONFIG_PM) += pm.o dpm.o
+obj-$(CONFIG_SCB_PRIORITY) += scb.o
index 0bc4723..d56a55a 100644 (file)
@@ -104,6 +104,7 @@ static struct platform_device bfin_rotary_device = {
 
 #if defined(CONFIG_STMMAC_ETH) || defined(CONFIG_STMMAC_ETH_MODULE)
 #include <linux/stmmac.h>
+#include <linux/phy.h>
 
 static unsigned short pins[] = P_RMII0;
 
@@ -111,11 +112,26 @@ static struct stmmac_mdio_bus_data phy_private_data = {
        .phy_mask = 1,
 };
 
+static struct stmmac_dma_cfg eth_dma_cfg = {
+       .pbl    = 2,
+};
+
+int stmmac_ptp_clk_init(struct platform_device *pdev)
+{
+       bfin_write32(PADS0_EMAC_PTP_CLKSEL, 0);
+       return 0;
+}
+
 static struct plat_stmmacenet_data eth_private_data = {
+       .has_gmac = 1,
        .bus_id   = 0,
        .enh_desc = 1,
        .phy_addr = 1,
        .mdio_bus_data = &phy_private_data,
+       .dma_cfg  = &eth_dma_cfg,
+       .force_thresh_dma_mode = 1,
+       .interface = PHY_INTERFACE_MODE_RMII,
+       .init = stmmac_ptp_clk_init,
 };
 
 static struct platform_device bfin_eth_device = {
@@ -1107,6 +1123,81 @@ static struct bfin_display_config bfin_display_data = {
 };
 #endif
 
+#if IS_ENABLED(CONFIG_VIDEO_ADV7343)
+#include <media/adv7343.h>
+
+static struct v4l2_output adv7343_outputs[] = {
+       {
+               .index = 0,
+               .name = "Composite",
+               .type = V4L2_OUTPUT_TYPE_ANALOG,
+               .std = V4L2_STD_ALL,
+               .capabilities = V4L2_OUT_CAP_STD,
+       },
+       {
+               .index = 1,
+               .name = "S-Video",
+               .type = V4L2_OUTPUT_TYPE_ANALOG,
+               .std = V4L2_STD_ALL,
+               .capabilities = V4L2_OUT_CAP_STD,
+       },
+       {
+               .index = 2,
+               .name = "Component",
+               .type = V4L2_OUTPUT_TYPE_ANALOG,
+               .std = V4L2_STD_ALL,
+               .capabilities = V4L2_OUT_CAP_STD,
+       },
+
+};
+
+static struct disp_route adv7343_routes[] = {
+       {
+               .output = ADV7343_COMPOSITE_ID,
+       },
+       {
+               .output = ADV7343_SVIDEO_ID,
+       },
+       {
+               .output = ADV7343_COMPONENT_ID,
+       },
+};
+
+static struct adv7343_platform_data adv7343_data = {
+       .mode_config = {
+               .sleep_mode = false,
+               .pll_control = false,
+               .dac_1 = true,
+               .dac_2 = true,
+               .dac_3 = true,
+               .dac_4 = true,
+               .dac_5 = true,
+               .dac_6 = true,
+       },
+       .sd_config = {
+               .sd_dac_out1 = false,
+               .sd_dac_out2 = false,
+       },
+};
+
+static struct bfin_display_config bfin_display_data = {
+       .card_name = "BF609",
+       .outputs = adv7343_outputs,
+       .num_outputs = ARRAY_SIZE(adv7343_outputs),
+       .routes = adv7343_routes,
+       .i2c_adapter_id = 0,
+       .board_info = {
+               .type = "adv7343",
+               .addr = 0x2b,
+               .platform_data = (void *)&adv7343_data,
+       },
+       .ppi_info = &ppi_info_disp,
+       .ppi_control = (PACK_EN | DLEN_8 | EPPI_CTL_FS1LO_FS2LO
+                       | EPPI_CTL_POLC3 | EPPI_CTL_BLANKGEN | EPPI_CTL_SYNC2
+                       | EPPI_CTL_NON656 | EPPI_CTL_DIR),
+};
+#endif
+
 static struct platform_device bfin_display_device = {
        .name = "bfin_display",
        .dev = {
index 437d56c..dab8849 100644 (file)
@@ -220,6 +220,12 @@ unsigned long sys_clk_get_rate(struct clk *clk)
        }
 }
 
+unsigned long dummy_get_rate(struct clk *clk)
+{
+       clk->parent->rate = clk_get_rate(clk->parent);
+       return clk->parent->rate;
+}
+
 unsigned long sys_clk_round_rate(struct clk *clk, unsigned long rate)
 {
        unsigned long max_rate;
@@ -283,6 +289,10 @@ static struct clk_ops sys_clk_ops = {
        .round_rate = sys_clk_round_rate,
 };
 
+static struct clk_ops dummy_clk_ops = {
+       .get_rate = dummy_get_rate,
+};
+
 static struct clk sys_clkin = {
        .name       = "SYS_CLKIN",
        .rate       = CONFIG_CLKIN_HZ,
@@ -364,6 +374,12 @@ static struct clk oclk = {
        .parent     = &pll_clk,
 };
 
+static struct clk ethclk = {
+       .name       = "stmmaceth",
+       .parent     = &sclk0,
+       .ops        = &dummy_clk_ops,
+};
+
 static struct clk_lookup bf609_clks[] = {
        CLK(sys_clkin, NULL, "SYS_CLKIN"),
        CLK(pll_clk, NULL, "PLLCLK"),
@@ -375,6 +391,7 @@ static struct clk_lookup bf609_clks[] = {
        CLK(sclk1, NULL, "SCLK1"),
        CLK(dclk, NULL, "DCLK"),
        CLK(oclk, NULL, "OCLK"),
+       CLK(ethclk, NULL, "stmmaceth"),
 };
 
 int __init clk_init(void)
index f1a6afa..35caa7b 100644 (file)
 #define PORTG_LOCK                  0xFFC03344         /* PORTG Port x GPIO Lock Register */
 #define PORTG_REVID                 0xFFC0337C         /* PORTG Port x GPIO Revision ID */
 
+/* ==================================================
+        Pads Controller Registers
+   ================================================== */
+
+/* =========================
+        PADS0
+   ========================= */
+#define PADS0_EMAC_PTP_CLKSEL      0xFFC03404         /* PADS0 Clock Selection for EMAC and PTP */
+#define PADS0_TWI_VSEL             0xFFC03408         /* PADS0 TWI Voltage Selection */
+#define PADS0_PORTS_HYST           0xFFC03440         /* PADS0 Hysteresis Enable Register */
 
 /* =========================
         PINT Registers
diff --git a/arch/blackfin/mach-bf609/scb.c b/arch/blackfin/mach-bf609/scb.c
new file mode 100644 (file)
index 0000000..ac1f07c
--- /dev/null
@@ -0,0 +1,363 @@
+/*
+ * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <asm/blackfin.h>
+#include <asm/scb.h>
+
+struct scb_mi_prio scb_data[] = {
+#ifdef CONFIG_SCB0_MI0
+       { REG_SCB0_ARBR0, REG_SCB0_ARBW0, 32, {
+               CONFIG_SCB0_MI0_SLOT0,
+               CONFIG_SCB0_MI0_SLOT1,
+               CONFIG_SCB0_MI0_SLOT2,
+               CONFIG_SCB0_MI0_SLOT3,
+               CONFIG_SCB0_MI0_SLOT4,
+               CONFIG_SCB0_MI0_SLOT5,
+               CONFIG_SCB0_MI0_SLOT6,
+               CONFIG_SCB0_MI0_SLOT7,
+               CONFIG_SCB0_MI0_SLOT8,
+               CONFIG_SCB0_MI0_SLOT9,
+               CONFIG_SCB0_MI0_SLOT10,
+               CONFIG_SCB0_MI0_SLOT11,
+               CONFIG_SCB0_MI0_SLOT12,
+               CONFIG_SCB0_MI0_SLOT13,
+               CONFIG_SCB0_MI0_SLOT14,
+               CONFIG_SCB0_MI0_SLOT15,
+               CONFIG_SCB0_MI0_SLOT16,
+               CONFIG_SCB0_MI0_SLOT17,
+               CONFIG_SCB0_MI0_SLOT18,
+               CONFIG_SCB0_MI0_SLOT19,
+               CONFIG_SCB0_MI0_SLOT20,
+               CONFIG_SCB0_MI0_SLOT21,
+               CONFIG_SCB0_MI0_SLOT22,
+               CONFIG_SCB0_MI0_SLOT23,
+               CONFIG_SCB0_MI0_SLOT24,
+               CONFIG_SCB0_MI0_SLOT25,
+               CONFIG_SCB0_MI0_SLOT26,
+               CONFIG_SCB0_MI0_SLOT27,
+               CONFIG_SCB0_MI0_SLOT28,
+               CONFIG_SCB0_MI0_SLOT29,
+               CONFIG_SCB0_MI0_SLOT30,
+               CONFIG_SCB0_MI0_SLOT31
+               },
+       },
+#endif
+#ifdef CONFIG_SCB0_MI1
+       { REG_SCB0_ARBR1, REG_SCB0_ARBW1, 32, {
+               CONFIG_SCB0_MI1_SLOT0,
+               CONFIG_SCB0_MI1_SLOT1,
+               CONFIG_SCB0_MI1_SLOT2,
+               CONFIG_SCB0_MI1_SLOT3,
+               CONFIG_SCB0_MI1_SLOT4,
+               CONFIG_SCB0_MI1_SLOT5,
+               CONFIG_SCB0_MI1_SLOT6,
+               CONFIG_SCB0_MI1_SLOT7,
+               CONFIG_SCB0_MI1_SLOT8,
+               CONFIG_SCB0_MI1_SLOT9,
+               CONFIG_SCB0_MI1_SLOT10,
+               CONFIG_SCB0_MI1_SLOT11,
+               CONFIG_SCB0_MI1_SLOT12,
+               CONFIG_SCB0_MI1_SLOT13,
+               CONFIG_SCB0_MI1_SLOT14,
+               CONFIG_SCB0_MI1_SLOT15,
+               CONFIG_SCB0_MI1_SLOT16,
+               CONFIG_SCB0_MI1_SLOT17,
+               CONFIG_SCB0_MI1_SLOT18,
+               CONFIG_SCB0_MI1_SLOT19,
+               CONFIG_SCB0_MI1_SLOT20,
+               CONFIG_SCB0_MI1_SLOT21,
+               CONFIG_SCB0_MI1_SLOT22,
+               CONFIG_SCB0_MI1_SLOT23,
+               CONFIG_SCB0_MI1_SLOT24,
+               CONFIG_SCB0_MI1_SLOT25,
+               CONFIG_SCB0_MI1_SLOT26,
+               CONFIG_SCB0_MI1_SLOT27,
+               CONFIG_SCB0_MI1_SLOT28,
+               CONFIG_SCB0_MI1_SLOT29,
+               CONFIG_SCB0_MI1_SLOT30,
+               CONFIG_SCB0_MI1_SLOT31
+               },
+       },
+#endif
+#ifdef CONFIG_SCB0_MI2
+       { REG_SCB0_ARBR2, REG_SCB0_ARBW2, 32, {
+               CONFIG_SCB0_MI2_SLOT0,
+               CONFIG_SCB0_MI2_SLOT1,
+               CONFIG_SCB0_MI2_SLOT2,
+               CONFIG_SCB0_MI2_SLOT3,
+               CONFIG_SCB0_MI2_SLOT4,
+               CONFIG_SCB0_MI2_SLOT5,
+               CONFIG_SCB0_MI2_SLOT6,
+               CONFIG_SCB0_MI2_SLOT7,
+               CONFIG_SCB0_MI2_SLOT8,
+               CONFIG_SCB0_MI2_SLOT9,
+               CONFIG_SCB0_MI2_SLOT10,
+               CONFIG_SCB0_MI2_SLOT11,
+               CONFIG_SCB0_MI2_SLOT12,
+               CONFIG_SCB0_MI2_SLOT13,
+               CONFIG_SCB0_MI2_SLOT14,
+               CONFIG_SCB0_MI2_SLOT15,
+               CONFIG_SCB0_MI2_SLOT16,
+               CONFIG_SCB0_MI2_SLOT17,
+               CONFIG_SCB0_MI2_SLOT18,
+               CONFIG_SCB0_MI2_SLOT19,
+               CONFIG_SCB0_MI2_SLOT20,
+               CONFIG_SCB0_MI2_SLOT21,
+               CONFIG_SCB0_MI2_SLOT22,
+               CONFIG_SCB0_MI2_SLOT23,
+               CONFIG_SCB0_MI2_SLOT24,
+               CONFIG_SCB0_MI2_SLOT25,
+               CONFIG_SCB0_MI2_SLOT26,
+               CONFIG_SCB0_MI2_SLOT27,
+               CONFIG_SCB0_MI2_SLOT28,
+               CONFIG_SCB0_MI2_SLOT29,
+               CONFIG_SCB0_MI2_SLOT30,
+               CONFIG_SCB0_MI2_SLOT31
+               },
+       },
+#endif
+#ifdef CONFIG_SCB0_MI3
+       { REG_SCB0_ARBR3, REG_SCB0_ARBW3, 32, {
+               CONFIG_SCB0_MI3_SLOT0,
+               CONFIG_SCB0_MI3_SLOT1,
+               CONFIG_SCB0_MI3_SLOT2,
+               CONFIG_SCB0_MI3_SLOT3,
+               CONFIG_SCB0_MI3_SLOT4,
+               CONFIG_SCB0_MI3_SLOT5,
+               CONFIG_SCB0_MI3_SLOT6,
+               CONFIG_SCB0_MI3_SLOT7,
+               CONFIG_SCB0_MI3_SLOT8,
+               CONFIG_SCB0_MI3_SLOT9,
+               CONFIG_SCB0_MI3_SLOT10,
+               CONFIG_SCB0_MI3_SLOT11,
+               CONFIG_SCB0_MI3_SLOT12,
+               CONFIG_SCB0_MI3_SLOT13,
+               CONFIG_SCB0_MI3_SLOT14,
+               CONFIG_SCB0_MI3_SLOT15,
+               CONFIG_SCB0_MI3_SLOT16,
+               CONFIG_SCB0_MI3_SLOT17,
+               CONFIG_SCB0_MI3_SLOT18,
+               CONFIG_SCB0_MI3_SLOT19,
+               CONFIG_SCB0_MI3_SLOT20,
+               CONFIG_SCB0_MI3_SLOT21,
+               CONFIG_SCB0_MI3_SLOT22,
+               CONFIG_SCB0_MI3_SLOT23,
+               CONFIG_SCB0_MI3_SLOT24,
+               CONFIG_SCB0_MI3_SLOT25,
+               CONFIG_SCB0_MI3_SLOT26,
+               CONFIG_SCB0_MI3_SLOT27,
+               CONFIG_SCB0_MI3_SLOT28,
+               CONFIG_SCB0_MI3_SLOT29,
+               CONFIG_SCB0_MI3_SLOT30,
+               CONFIG_SCB0_MI3_SLOT31
+               },
+       },
+#endif
+#ifdef CONFIG_SCB0_MI4
+       { REG_SCB0_ARBR4, REG_SCB4_ARBW0, 32, {
+               CONFIG_SCB0_MI4_SLOT0,
+               CONFIG_SCB0_MI4_SLOT1,
+               CONFIG_SCB0_MI4_SLOT2,
+               CONFIG_SCB0_MI4_SLOT3,
+               CONFIG_SCB0_MI4_SLOT4,
+               CONFIG_SCB0_MI4_SLOT5,
+               CONFIG_SCB0_MI4_SLOT6,
+               CONFIG_SCB0_MI4_SLOT7,
+               CONFIG_SCB0_MI4_SLOT8,
+               CONFIG_SCB0_MI4_SLOT9,
+               CONFIG_SCB0_MI4_SLOT10,
+               CONFIG_SCB0_MI4_SLOT11,
+               CONFIG_SCB0_MI4_SLOT12,
+               CONFIG_SCB0_MI4_SLOT13,
+               CONFIG_SCB0_MI4_SLOT14,
+               CONFIG_SCB0_MI4_SLOT15,
+               CONFIG_SCB0_MI4_SLOT16,
+               CONFIG_SCB0_MI4_SLOT17,
+               CONFIG_SCB0_MI4_SLOT18,
+               CONFIG_SCB0_MI4_SLOT19,
+               CONFIG_SCB0_MI4_SLOT20,
+               CONFIG_SCB0_MI4_SLOT21,
+               CONFIG_SCB0_MI4_SLOT22,
+               CONFIG_SCB0_MI4_SLOT23,
+               CONFIG_SCB0_MI4_SLOT24,
+               CONFIG_SCB0_MI4_SLOT25,
+               CONFIG_SCB0_MI4_SLOT26,
+               CONFIG_SCB0_MI4_SLOT27,
+               CONFIG_SCB0_MI4_SLOT28,
+               CONFIG_SCB0_MI4_SLOT29,
+               CONFIG_SCB0_MI4_SLOT30,
+               CONFIG_SCB0_MI4_SLOT31
+               },
+       },
+#endif
+#ifdef CONFIG_SCB0_MI5
+       { REG_SCB0_ARBR5, REG_SCB0_ARBW5, 16, {
+               CONFIG_SCB0_MI5_SLOT0,
+               CONFIG_SCB0_MI5_SLOT1,
+               CONFIG_SCB0_MI5_SLOT2,
+               CONFIG_SCB0_MI5_SLOT3,
+               CONFIG_SCB0_MI5_SLOT4,
+               CONFIG_SCB0_MI5_SLOT5,
+               CONFIG_SCB0_MI5_SLOT6,
+               CONFIG_SCB0_MI5_SLOT7,
+               CONFIG_SCB0_MI5_SLOT8,
+               CONFIG_SCB0_MI5_SLOT9,
+               CONFIG_SCB0_MI5_SLOT10,
+               CONFIG_SCB0_MI5_SLOT11,
+               CONFIG_SCB0_MI5_SLOT12,
+               CONFIG_SCB0_MI5_SLOT13,
+               CONFIG_SCB0_MI5_SLOT14,
+               CONFIG_SCB0_MI5_SLOT15
+               },
+       },
+#endif
+#ifdef CONFIG_SCB1_MI0
+       { REG_SCB1_ARBR0, REG_SCB1_ARBW0, 20, {
+               CONFIG_SCB1_MI0_SLOT0,
+               CONFIG_SCB1_MI0_SLOT1,
+               CONFIG_SCB1_MI0_SLOT2,
+               CONFIG_SCB1_MI0_SLOT3,
+               CONFIG_SCB1_MI0_SLOT4,
+               CONFIG_SCB1_MI0_SLOT5,
+               CONFIG_SCB1_MI0_SLOT6,
+               CONFIG_SCB1_MI0_SLOT7,
+               CONFIG_SCB1_MI0_SLOT8,
+               CONFIG_SCB1_MI0_SLOT9,
+               CONFIG_SCB1_MI0_SLOT10,
+               CONFIG_SCB1_MI0_SLOT11,
+               CONFIG_SCB1_MI0_SLOT12,
+               CONFIG_SCB1_MI0_SLOT13,
+               CONFIG_SCB1_MI0_SLOT14,
+               CONFIG_SCB1_MI0_SLOT15,
+               CONFIG_SCB1_MI0_SLOT16,
+               CONFIG_SCB1_MI0_SLOT17,
+               CONFIG_SCB1_MI0_SLOT18,
+               CONFIG_SCB1_MI0_SLOT19
+               },
+       },
+#endif
+#ifdef CONFIG_SCB2_MI0
+       { REG_SCB2_ARBR0, REG_SCB2_ARBW0, 10, {
+               CONFIG_SCB2_MI0_SLOT0,
+               CONFIG_SCB2_MI0_SLOT1,
+               CONFIG_SCB2_MI0_SLOT2,
+               CONFIG_SCB2_MI0_SLOT3,
+               CONFIG_SCB2_MI0_SLOT4,
+               CONFIG_SCB2_MI0_SLOT5,
+               CONFIG_SCB2_MI0_SLOT6,
+               CONFIG_SCB2_MI0_SLOT7,
+               CONFIG_SCB2_MI0_SLOT8,
+               CONFIG_SCB2_MI0_SLOT9
+               },
+       },
+#endif
+#ifdef CONFIG_SCB3_MI0
+       { REG_SCB3_ARBR0, REG_SCB3_ARBW0, 16, {
+               CONFIG_SCB3_MI0_SLOT0,
+               CONFIG_SCB3_MI0_SLOT1,
+               CONFIG_SCB3_MI0_SLOT2,
+               CONFIG_SCB3_MI0_SLOT3,
+               CONFIG_SCB3_MI0_SLOT4,
+               CONFIG_SCB3_MI0_SLOT5,
+               CONFIG_SCB3_MI0_SLOT6,
+               CONFIG_SCB3_MI0_SLOT7,
+               CONFIG_SCB3_MI0_SLOT8,
+               CONFIG_SCB3_MI0_SLOT9,
+               CONFIG_SCB3_MI0_SLOT10,
+               CONFIG_SCB3_MI0_SLOT11,
+               CONFIG_SCB3_MI0_SLOT12,
+               CONFIG_SCB3_MI0_SLOT13,
+               CONFIG_SCB3_MI0_SLOT14,
+               CONFIG_SCB3_MI0_SLOT15
+               },
+       },
+#endif
+#ifdef CONFIG_SCB4_MI0
+       { REG_SCB4_ARBR0, REG_SCB4_ARBW0, 16, {
+               CONFIG_SCB4_MI0_SLOT0,
+               CONFIG_SCB4_MI0_SLOT1,
+               CONFIG_SCB4_MI0_SLOT2,
+               CONFIG_SCB4_MI0_SLOT3,
+               CONFIG_SCB4_MI0_SLOT4,
+               CONFIG_SCB4_MI0_SLOT5,
+               CONFIG_SCB4_MI0_SLOT6,
+               CONFIG_SCB4_MI0_SLOT7,
+               CONFIG_SCB4_MI0_SLOT8,
+               CONFIG_SCB4_MI0_SLOT9,
+               CONFIG_SCB4_MI0_SLOT10,
+               CONFIG_SCB4_MI0_SLOT11,
+               CONFIG_SCB4_MI0_SLOT12,
+               CONFIG_SCB4_MI0_SLOT13,
+               CONFIG_SCB4_MI0_SLOT14,
+               CONFIG_SCB4_MI0_SLOT15
+               },
+       },
+#endif
+#ifdef CONFIG_SCB5_MI0
+       { REG_SCB5_ARBR0, REG_SCB5_ARBW0, 8, {
+               CONFIG_SCB5_MI0_SLOT0,
+               CONFIG_SCB5_MI0_SLOT1,
+               CONFIG_SCB5_MI0_SLOT2,
+               CONFIG_SCB5_MI0_SLOT3,
+               CONFIG_SCB5_MI0_SLOT4,
+               CONFIG_SCB5_MI0_SLOT5,
+               CONFIG_SCB5_MI0_SLOT6,
+               CONFIG_SCB5_MI0_SLOT7
+               },
+       },
+#endif
+#ifdef CONFIG_SCB6_MI0
+       { REG_SCB6_ARBR0, REG_SCB6_ARBW0, 4, {
+               CONFIG_SCB6_MI0_SLOT0,
+               CONFIG_SCB6_MI0_SLOT1,
+               CONFIG_SCB6_MI0_SLOT2,
+               CONFIG_SCB6_MI0_SLOT3
+               },
+       },
+#endif
+#ifdef CONFIG_SCB7_MI0
+       { REG_SCB7_ARBR0, REG_SCB7_ARBW0, 6, {
+               CONFIG_SCB7_MI0_SLOT0,
+               CONFIG_SCB7_MI0_SLOT1,
+               CONFIG_SCB7_MI0_SLOT2,
+               CONFIG_SCB7_MI0_SLOT3,
+               CONFIG_SCB7_MI0_SLOT4,
+               CONFIG_SCB7_MI0_SLOT5
+               },
+       },
+#endif
+#ifdef CONFIG_SCB8_MI0
+       { REG_SCB8_ARBR0, REG_SCB8_ARBW0, 8, {
+               CONFIG_SCB8_MI0_SLOT0,
+               CONFIG_SCB8_MI0_SLOT1,
+               CONFIG_SCB8_MI0_SLOT2,
+               CONFIG_SCB8_MI0_SLOT3,
+               CONFIG_SCB8_MI0_SLOT4,
+               CONFIG_SCB8_MI0_SLOT5,
+               CONFIG_SCB8_MI0_SLOT6,
+               CONFIG_SCB8_MI0_SLOT7
+               },
+       },
+#endif
+#ifdef CONFIG_SCB9_MI0
+       { REG_SCB9_ARBR0, REG_SCB9_ARBW0, 10, {
+               CONFIG_SCB9_MI0_SLOT0,
+               CONFIG_SCB9_MI0_SLOT1,
+               CONFIG_SCB9_MI0_SLOT2,
+               CONFIG_SCB9_MI0_SLOT3,
+               CONFIG_SCB9_MI0_SLOT4,
+               CONFIG_SCB9_MI0_SLOT5,
+               CONFIG_SCB9_MI0_SLOT6,
+               CONFIG_SCB9_MI0_SLOT7,
+               CONFIG_SCB9_MI0_SLOT8,
+               CONFIG_SCB9_MI0_SLOT9
+               },
+       },
+#endif
+       { 0, }
+};
index 675466d..f099792 100644 (file)
@@ -10,6 +10,7 @@ obj-$(CONFIG_PM)          += pm.o
 ifneq ($(CONFIG_BF60x),y)
 obj-$(CONFIG_PM)         += dpmc_modes.o
 endif
+obj-$(CONFIG_SCB_PRIORITY)     += scb-init.o
 obj-$(CONFIG_CPU_VOLTAGE) += dpmc.o
 obj-$(CONFIG_SMP)         += smp.o
 obj-$(CONFIG_BFIN_KERNEL_CLOCK) += clocks-init.o
diff --git a/arch/blackfin/mach-common/scb-init.c b/arch/blackfin/mach-common/scb-init.c
new file mode 100644 (file)
index 0000000..2cbfb0b
--- /dev/null
@@ -0,0 +1,53 @@
+/*
+ * arch/blackfin/mach-common/scb-init.c - reprogram system cross bar priority
+ *
+ * Copyright 2012 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/init.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <asm/scb.h>
+
+__attribute__((l1_text))
+inline void scb_mi_write(unsigned long scb_mi_arbw, unsigned int slots,
+               unsigned char *scb_mi_prio)
+{
+       unsigned int i;
+
+       for (i = 0; i < slots; ++i)
+               bfin_write32(scb_mi_arbw, (i << SCB_SLOT_OFFSET) | scb_mi_prio[i]);
+}
+
+__attribute__((l1_text))
+inline void scb_mi_read(unsigned long scb_mi_arbw, unsigned int slots,
+               unsigned char *scb_mi_prio)
+{
+       unsigned int i;
+
+       for (i = 0; i < slots; ++i) {
+               bfin_write32(scb_mi_arbw, (0xFF << SCB_SLOT_OFFSET) | i);
+               scb_mi_prio[i] = bfin_read32(scb_mi_arbw);
+       }
+}
+
+__attribute__((l1_text))
+void init_scb(void)
+{
+       unsigned int i, j;
+       unsigned char scb_tmp_prio[32];
+
+       pr_info("Init System Crossbar\n");
+       for (i = 0; scb_data[i].scb_mi_arbr > 0; ++i) {
+
+               scb_mi_write(scb_data[i].scb_mi_arbw, scb_data[i].scb_mi_slots, scb_data[i].scb_mi_prio);
+
+               pr_debug("scb priority at 0x%lx:\n", scb_data[i].scb_mi_arbr);
+               scb_mi_read(scb_data[i].scb_mi_arbw, scb_data[i].scb_mi_slots, scb_tmp_prio);
+               for (j = 0; j < scb_data[i].scb_mi_slots; ++j)
+                       pr_debug("slot %d = %d\n", j, scb_tmp_prio[j]);
+       }
+
+}
index f6a3648..957dd00 100644 (file)
@@ -10,7 +10,6 @@ config C6X
        select GENERIC_IRQ_SHOW
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DMA_API_DEBUG
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_MEMBLOCK
        select SPARSE_IRQ
        select IRQ_DOMAIN
index c699d32..02380be 100644 (file)
@@ -41,7 +41,6 @@ config CRIS
        default y
        select HAVE_IDE
        select GENERIC_ATOMIC64
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_UID16
        select VIRT_TO_BUS
        select ARCH_WANT_IPC_PARSE_VERSION
index 73312ab..1790f22 100644 (file)
@@ -58,8 +58,7 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
        struct vm_area_struct * vma;
        siginfo_t info;
        int fault;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                               ((writeaccess & 1) ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        D(printk(KERN_DEBUG
                 "Page fault for %lX on %X at %lX, prot %d write %d\n",
@@ -117,6 +116,8 @@ do_page_fault(unsigned long address, struct pt_regs *regs,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -155,6 +156,7 @@ retry:
        } else if (writeaccess == 1) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
                        goto bad_area;
index 4b6628e..34aa193 100644 (file)
@@ -5,7 +5,6 @@ config FRV
        select HAVE_ARCH_TRACEHOOK
        select HAVE_PERF_EVENTS
        select HAVE_UID16
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_SHOW
        select HAVE_DEBUG_BUGVERBOSE
index 331c1e2..9a66372 100644 (file)
@@ -34,11 +34,11 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
        struct vm_area_struct *vma;
        struct mm_struct *mm;
        unsigned long _pme, lrai, lrad, fixup;
+       unsigned long flags = 0;
        siginfo_t info;
        pgd_t *pge;
        pud_t *pue;
        pte_t *pte;
-       int write;
        int fault;
 
 #if 0
@@ -81,6 +81,9 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(__frame))
+               flags |= FAULT_FLAG_USER;
+
        down_read(&mm->mmap_sem);
 
        vma = find_vma(mm, ear0);
@@ -129,7 +132,6 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
  */
  good_area:
        info.si_code = SEGV_ACCERR;
-       write = 0;
        switch (esr0 & ESR0_ATXC) {
        default:
                /* handle write to write protected page */
@@ -140,7 +142,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
 #endif
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
-               write = 1;
+               flags |= FAULT_FLAG_WRITE;
                break;
 
                 /* handle read from protected page */
@@ -162,7 +164,7 @@ asmlinkage void do_page_fault(int datammu, unsigned long esr0, unsigned long ear
         * make sure we exit gracefully rather than endlessly redo
         * the fault.
         */
-       fault = handle_mm_fault(mm, vma, ear0, write ? FAULT_FLAG_WRITE : 0);
+       fault = handle_mm_fault(mm, vma, ear0, flags);
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
index 3d6759e..24b1dc2 100644 (file)
@@ -2,7 +2,6 @@ config H8300
        bool
        default y
        select HAVE_IDE
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_ATOMIC64
        select HAVE_UID16
        select VIRT_TO_BUS
index 77d442a..99041b0 100644 (file)
@@ -15,7 +15,6 @@ config HEXAGON
        # select GENERIC_PENDING_IRQ if SMP
        select GENERIC_ATOMIC64
        select HAVE_PERF_EVENTS
-       select HAVE_GENERIC_HARDIRQS
        # GENERIC_ALLOCATOR is used by dma_alloc_coherent()
        select GENERIC_ALLOCATOR
        select GENERIC_IRQ_SHOW
index 1bd276d..8704c93 100644 (file)
@@ -53,8 +53,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
        int si_code = SEGV_MAPERR;
        int fault;
        const struct exception_table_entry *fixup;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                (cause > 0 ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        /*
         * If we're in an interrupt or have no user context,
@@ -65,6 +64,8 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs)
 
        local_irq_enable();
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -96,6 +97,7 @@ good_area:
        case FLT_STORE:
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
                break;
        }
 
index a86a56d..7740ab1 100644 (file)
@@ -21,7 +21,6 @@ config IA64
        select HAVE_KVM
        select HAVE_ARCH_TRACEHOOK
        select HAVE_DMA_API_DEBUG
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_MEMBLOCK
        select HAVE_MEMBLOCK_NODE_MAP
        select HAVE_VIRT_CPU_ACCOUNTING
index 6cf0341..7225dad 100644 (file)
@@ -90,8 +90,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
        mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
                | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT));
 
-       flags |= ((mask & VM_WRITE) ? FAULT_FLAG_WRITE : 0);
-
        /* mmap_sem is performance critical.... */
        prefetchw(&mm->mmap_sem);
 
@@ -119,6 +117,10 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re
        if (notify_page_fault(regs, TRAP_BRKPT))
                return;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+       if (mask & VM_WRITE)
+               flags |= FAULT_FLAG_WRITE;
 retry:
        down_read(&mm->mmap_sem);
 
index 76069c1..68232db 100644 (file)
@@ -114,6 +114,11 @@ int pud_huge(pud_t pud)
        return 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 0;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address, pmd_t *pmd, int write)
 {
index 29a7ef4..75661fb 100644 (file)
@@ -9,7 +9,6 @@ config M32R
        select HAVE_KERNEL_LZMA
        select ARCH_WANT_IPC_PARSE_VERSION
        select HAVE_DEBUG_BUGVERBOSE
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
index 3cdfa9c..e9c6a80 100644 (file)
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
        struct mm_struct *mm;
        struct vm_area_struct * vma;
        unsigned long page, addr;
-       int write;
+       unsigned long flags = 0;
        int fault;
        siginfo_t info;
 
@@ -117,6 +117,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
        if (in_atomic() || !mm)
                goto bad_area_nosemaphore;
 
+       if (error_code & ACE_USERMODE)
+               flags |= FAULT_FLAG_USER;
+
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
         * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -166,14 +169,13 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long error_code,
  */
 good_area:
        info.si_code = SEGV_ACCERR;
-       write = 0;
        switch (error_code & (ACE_WRITE|ACE_PROTECTION)) {
                default:        /* 3: write, present */
                        /* fall through */
                case ACE_WRITE: /* write, not present */
                        if (!(vma->vm_flags & VM_WRITE))
                                goto bad_area;
-                       write++;
+                       flags |= FAULT_FLAG_WRITE;
                        break;
                case ACE_PROTECTION:    /* read, present */
                case 0:         /* read, not present */
@@ -194,7 +196,7 @@ good_area:
         */
        addr = (address & PAGE_MASK);
        set_thread_fault_code(error_code);
-       fault = handle_mm_fault(mm, vma, addr, write ? FAULT_FLAG_WRITE : 0);
+       fault = handle_mm_fault(mm, vma, addr, flags);
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
index c3cda41..311a300 100644 (file)
@@ -4,7 +4,6 @@ config M68K
        select HAVE_IDE
        select HAVE_AOUT if MMU
        select HAVE_DEBUG_BUGVERBOSE
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
        select GENERIC_ATOMIC64
        select HAVE_UID16
index a563727..eb1d61f 100644 (file)
@@ -88,6 +88,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
 
index cfd831c..36368eb 100644 (file)
@@ -13,7 +13,6 @@ config METAG
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_TRACE_MCOUNT_TEST
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZO
index 8fddf46..332680e 100644 (file)
@@ -53,8 +53,7 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
        struct vm_area_struct *vma, *prev_vma;
        siginfo_t info;
        int fault;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                               (write_access ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        tsk = current;
 
@@ -109,6 +108,8 @@ int do_page_fault(struct pt_regs *regs, unsigned long address,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
 
@@ -121,6 +122,7 @@ good_area:
        if (write_access) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
                        goto bad_area;
index 3c52fa6..0424315 100644 (file)
@@ -110,6 +110,11 @@ int pud_huge(pud_t pud)
        return 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                             pmd_t *pmd, int write)
 {
index 3f6659c..b82f82b 100644 (file)
@@ -18,7 +18,6 @@ config MICROBLAZE
        select ARCH_WANT_IPC_PARSE_VERSION
        select HAVE_DEBUG_KMEMLEAK
        select IRQ_DOMAIN
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
index 731f739..fa4cf52 100644 (file)
@@ -92,8 +92,7 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
        int code = SEGV_MAPERR;
        int is_write = error_code & ESR_S;
        int fault;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                        (is_write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        regs->ear = address;
        regs->esr = error_code;
@@ -121,6 +120,9 @@ void do_page_fault(struct pt_regs *regs, unsigned long address,
                die("Weird page fault", regs, SIGSEGV);
        }
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
         * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -199,6 +201,7 @@ good_area:
        if (unlikely(is_write)) {
                if (unlikely(!(vma->vm_flags & VM_WRITE)))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        /* a read */
        } else {
                /* protection fault */
index 71f15e7..f75ab4a 100644 (file)
@@ -25,7 +25,6 @@ config MIPS
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select HAVE_DMA_ATTRS
        select HAVE_DMA_API_DEBUG
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
        select GENERIC_PCI_IOMAP
@@ -95,6 +94,7 @@ config ATH79
        select CSRC_R4K
        select DMA_NONCOHERENT
        select HAVE_CLK
+       select CLKDEV_LOOKUP
        select IRQ_CPU
        select MIPS_MACHINE
        select SYS_HAS_CPU_MIPS32_R2
@@ -131,7 +131,6 @@ config BCM63XX
        select IRQ_CPU
        select SYS_HAS_CPU_MIPS32_R1
        select SYS_HAS_CPU_BMIPS4350 if !BCM63XX_CPU_6338 && !BCM63XX_CPU_6345 && !BCM63XX_CPU_6348
-       select NR_CPUS_DEFAULT_2
        select SYS_SUPPORTS_32BIT_KERNEL
        select SYS_SUPPORTS_BIG_ENDIAN
        select SYS_HAS_EARLY_PRINTK
@@ -445,6 +444,8 @@ config RALINK
        select SYS_HAS_EARLY_PRINTK
        select HAVE_MACH_CLKDEV
        select CLKDEV_LOOKUP
+       select ARCH_HAS_RESET_CONTROLLER
+       select RESET_CONTROLLER
 
 config SGI_IP22
        bool "SGI IP22 (Indy/Indigo2)"
@@ -609,7 +610,6 @@ config SIBYTE_SWARM
        select BOOT_ELF32
        select DMA_COHERENT
        select HAVE_PATA_PLATFORM
-       select NR_CPUS_DEFAULT_2
        select SIBYTE_SB1250
        select SWAP_IO_SPACE
        select SYS_HAS_CPU_SB1
@@ -623,7 +623,6 @@ config SIBYTE_LITTLESUR
        select BOOT_ELF32
        select DMA_COHERENT
        select HAVE_PATA_PLATFORM
-       select NR_CPUS_DEFAULT_2
        select SIBYTE_SB1250
        select SWAP_IO_SPACE
        select SYS_HAS_CPU_SB1
@@ -635,7 +634,6 @@ config SIBYTE_SENTOSA
        bool "Sibyte BCM91250E-Sentosa"
        select BOOT_ELF32
        select DMA_COHERENT
-       select NR_CPUS_DEFAULT_2
        select SIBYTE_SB1250
        select SWAP_IO_SPACE
        select SYS_HAS_CPU_SB1
@@ -731,6 +729,7 @@ config CAVIUM_OCTEON_SOC
        select USB_ARCH_HAS_OHCI
        select USB_ARCH_HAS_EHCI
        select HOLES_IN_ZONE
+       select ARCH_REQUIRE_GPIOLIB
        help
          This option supports all of the Octeon reference boards from Cavium
          Networks. It builds a kernel that dynamically determines the Octeon
@@ -1860,7 +1859,6 @@ config MIPS_MT_SMP
        select CPU_MIPSR2_IRQ_VI
        select CPU_MIPSR2_IRQ_EI
        select MIPS_MT
-       select NR_CPUS_DEFAULT_2
        select SMP
        select SYS_SUPPORTS_SCHED_SMT if SMP
        select SYS_SUPPORTS_SMP
@@ -2171,12 +2169,6 @@ config SYS_SUPPORTS_MIPS_CMP
 config SYS_SUPPORTS_SMP
        bool
 
-config NR_CPUS_DEFAULT_1
-       bool
-
-config NR_CPUS_DEFAULT_2
-       bool
-
 config NR_CPUS_DEFAULT_4
        bool
 
@@ -2194,10 +2186,8 @@ config NR_CPUS_DEFAULT_64
 
 config NR_CPUS
        int "Maximum number of CPUs (2-64)"
-       range 1 64 if NR_CPUS_DEFAULT_1
+       range 2 64
        depends on SMP
-       default "1" if NR_CPUS_DEFAULT_1
-       default "2" if NR_CPUS_DEFAULT_2
        default "4" if NR_CPUS_DEFAULT_4
        default "8" if NR_CPUS_DEFAULT_8
        default "16" if NR_CPUS_DEFAULT_16
index 37f9ef3..75a36ad 100644 (file)
@@ -194,6 +194,8 @@ include $(srctree)/arch/mips/Kbuild.platforms
 ifdef CONFIG_PHYSICAL_START
 load-y                                 = $(CONFIG_PHYSICAL_START)
 endif
+entry-y                                = 0x$(shell $(NM) vmlinux 2>/dev/null \
+                                       | grep "\bkernel_entry\b" | cut -f1 -d \ )
 
 cflags-y                       += -I$(srctree)/arch/mips/include/asm/mach-generic
 drivers-$(CONFIG_PCI)          += arch/mips/pci/
@@ -225,6 +227,9 @@ KBUILD_CFLAGS       += $(cflags-y)
 KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y)
 KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0)
 
+bootvars-y     = VMLINUX_LOAD_ADDRESS=$(load-y) \
+                 VMLINUX_ENTRY_ADDRESS=$(entry-y)
+
 LDFLAGS                        += -m $(ld-emul)
 
 ifdef CONFIG_CC_STACKPROTECTOR
@@ -254,9 +259,25 @@ drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/
 # suspend and hibernation support
 drivers-$(CONFIG_PM)   += arch/mips/power/
 
+# boot image targets (arch/mips/boot/)
+boot-y                 := vmlinux.bin
+boot-y                 += vmlinux.ecoff
+boot-y                 += vmlinux.srec
+ifeq ($(shell expr $(load-y) \< 0xffffffff80000000 2> /dev/null), 0)
+boot-y                 += uImage
+boot-y                 += uImage.gz
+endif
+
+# compressed boot image targets (arch/mips/boot/compressed/)
+bootz-y                        := vmlinuz
+bootz-y                        += vmlinuz.bin
+bootz-y                        += vmlinuz.ecoff
+bootz-y                        += vmlinuz.srec
+
 ifdef CONFIG_LASAT
 rom.bin rom.sw: vmlinux
-       $(Q)$(MAKE) $(build)=arch/mips/lasat/image $@
+       $(Q)$(MAKE) $(build)=arch/mips/lasat/image \
+               $(bootvars-y) $@
 endif
 
 #
@@ -280,13 +301,14 @@ vmlinux.64: vmlinux
 all:   $(all-y)
 
 # boot
-vmlinux.bin vmlinux.ecoff vmlinux.srec: $(vmlinux-32) FORCE
-       $(Q)$(MAKE) $(build)=arch/mips/boot VMLINUX=$(vmlinux-32) arch/mips/boot/$@
+$(boot-y): $(vmlinux-32) FORCE
+       $(Q)$(MAKE) $(build)=arch/mips/boot VMLINUX=$(vmlinux-32) \
+               $(bootvars-y) arch/mips/boot/$@
 
 # boot/compressed
-vmlinuz vmlinuz.bin vmlinuz.ecoff vmlinuz.srec: $(vmlinux-32) FORCE
+$(bootz-y): $(vmlinux-32) FORCE
        $(Q)$(MAKE) $(build)=arch/mips/boot/compressed \
-          VMLINUX_LOAD_ADDRESS=$(load-y) 32bit-bfd=$(32bit-bfd) $@
+               $(bootvars-y) 32bit-bfd=$(32bit-bfd) $@
 
 
 CLEAN_FILES += vmlinux.32 vmlinux.64
@@ -323,6 +345,8 @@ define archhelp
        echo '  vmlinuz.ecoff        - ECOFF zboot image'
        echo '  vmlinuz.bin          - Raw binary zboot image'
        echo '  vmlinuz.srec         - SREC zboot image'
+       echo '  uImage               - U-Boot image'
+       echo '  uImage.gz            - U-Boot image (gzip)'
        echo
        echo '  These will be default as appropriate for a configured platform.'
 endef
index 765ef30..26479f4 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/init.h>
 #include <linux/err.h>
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 
 #include <asm/div64.h>
 
@@ -31,92 +32,132 @@ struct clk {
        unsigned long rate;
 };
 
-static struct clk ath79_ref_clk;
-static struct clk ath79_cpu_clk;
-static struct clk ath79_ddr_clk;
-static struct clk ath79_ahb_clk;
-static struct clk ath79_wdt_clk;
-static struct clk ath79_uart_clk;
+static void __init ath79_add_sys_clkdev(const char *id, unsigned long rate)
+{
+       struct clk *clk;
+       int err;
+
+       clk = kzalloc(sizeof(*clk), GFP_KERNEL);
+       if (!clk)
+               panic("failed to allocate %s clock structure", id);
+
+       clk->rate = rate;
+
+       err = clk_register_clkdev(clk, id, NULL);
+       if (err)
+               panic("unable to register %s clock device", id);
+}
 
 static void __init ar71xx_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 pll;
        u32 freq;
        u32 div;
 
-       ath79_ref_clk.rate = AR71XX_BASE_FREQ;
+       ref_rate = AR71XX_BASE_FREQ;
 
        pll = ath79_pll_rr(AR71XX_PLL_REG_CPU_CONFIG);
 
        div = ((pll >> AR71XX_PLL_DIV_SHIFT) & AR71XX_PLL_DIV_MASK) + 1;
-       freq = div * ath79_ref_clk.rate;
+       freq = div * ref_rate;
 
        div = ((pll >> AR71XX_CPU_DIV_SHIFT) & AR71XX_CPU_DIV_MASK) + 1;
-       ath79_cpu_clk.rate = freq / div;
+       cpu_rate = freq / div;
 
        div = ((pll >> AR71XX_DDR_DIV_SHIFT) & AR71XX_DDR_DIV_MASK) + 1;
-       ath79_ddr_clk.rate = freq / div;
+       ddr_rate = freq / div;
 
        div = (((pll >> AR71XX_AHB_DIV_SHIFT) & AR71XX_AHB_DIV_MASK) + 1) * 2;
-       ath79_ahb_clk.rate = ath79_cpu_clk.rate / div;
+       ahb_rate = cpu_rate / div;
+
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
 
-       ath79_wdt_clk.rate = ath79_ahb_clk.rate;
-       ath79_uart_clk.rate = ath79_ahb_clk.rate;
+       clk_add_alias("wdt", NULL, "ahb", NULL);
+       clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
 static void __init ar724x_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 pll;
        u32 freq;
        u32 div;
 
-       ath79_ref_clk.rate = AR724X_BASE_FREQ;
+       ref_rate = AR724X_BASE_FREQ;
        pll = ath79_pll_rr(AR724X_PLL_REG_CPU_CONFIG);
 
        div = ((pll >> AR724X_PLL_DIV_SHIFT) & AR724X_PLL_DIV_MASK);
-       freq = div * ath79_ref_clk.rate;
+       freq = div * ref_rate;
 
        div = ((pll >> AR724X_PLL_REF_DIV_SHIFT) & AR724X_PLL_REF_DIV_MASK);
        freq *= div;
 
-       ath79_cpu_clk.rate = freq;
+       cpu_rate = freq;
 
        div = ((pll >> AR724X_DDR_DIV_SHIFT) & AR724X_DDR_DIV_MASK) + 1;
-       ath79_ddr_clk.rate = freq / div;
+       ddr_rate = freq / div;
 
        div = (((pll >> AR724X_AHB_DIV_SHIFT) & AR724X_AHB_DIV_MASK) + 1) * 2;
-       ath79_ahb_clk.rate = ath79_cpu_clk.rate / div;
+       ahb_rate = cpu_rate / div;
+
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
 
-       ath79_wdt_clk.rate = ath79_ahb_clk.rate;
-       ath79_uart_clk.rate = ath79_ahb_clk.rate;
+       clk_add_alias("wdt", NULL, "ahb", NULL);
+       clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
 static void __init ar913x_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 pll;
        u32 freq;
        u32 div;
 
-       ath79_ref_clk.rate = AR913X_BASE_FREQ;
+       ref_rate = AR913X_BASE_FREQ;
        pll = ath79_pll_rr(AR913X_PLL_REG_CPU_CONFIG);
 
        div = ((pll >> AR913X_PLL_DIV_SHIFT) & AR913X_PLL_DIV_MASK);
-       freq = div * ath79_ref_clk.rate;
+       freq = div * ref_rate;
 
-       ath79_cpu_clk.rate = freq;
+       cpu_rate = freq;
 
        div = ((pll >> AR913X_DDR_DIV_SHIFT) & AR913X_DDR_DIV_MASK) + 1;
-       ath79_ddr_clk.rate = freq / div;
+       ddr_rate = freq / div;
 
        div = (((pll >> AR913X_AHB_DIV_SHIFT) & AR913X_AHB_DIV_MASK) + 1) * 2;
-       ath79_ahb_clk.rate = ath79_cpu_clk.rate / div;
+       ahb_rate = cpu_rate / div;
 
-       ath79_wdt_clk.rate = ath79_ahb_clk.rate;
-       ath79_uart_clk.rate = ath79_ahb_clk.rate;
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
+
+       clk_add_alias("wdt", NULL, "ahb", NULL);
+       clk_add_alias("uart", NULL, "ahb", NULL);
 }
 
 static void __init ar933x_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 clock_ctrl;
        u32 cpu_config;
        u32 freq;
@@ -124,21 +165,21 @@ static void __init ar933x_clocks_init(void)
 
        t = ath79_reset_rr(AR933X_RESET_REG_BOOTSTRAP);
        if (t & AR933X_BOOTSTRAP_REF_CLK_40)
-               ath79_ref_clk.rate = (40 * 1000 * 1000);
+               ref_rate = (40 * 1000 * 1000);
        else
-               ath79_ref_clk.rate = (25 * 1000 * 1000);
+               ref_rate = (25 * 1000 * 1000);
 
        clock_ctrl = ath79_pll_rr(AR933X_PLL_CLOCK_CTRL_REG);
        if (clock_ctrl & AR933X_PLL_CLOCK_CTRL_BYPASS) {
-               ath79_cpu_clk.rate = ath79_ref_clk.rate;
-               ath79_ahb_clk.rate = ath79_ref_clk.rate;
-               ath79_ddr_clk.rate = ath79_ref_clk.rate;
+               cpu_rate = ref_rate;
+               ahb_rate = ref_rate;
+               ddr_rate = ref_rate;
        } else {
                cpu_config = ath79_pll_rr(AR933X_PLL_CPU_CONFIG_REG);
 
                t = (cpu_config >> AR933X_PLL_CPU_CONFIG_REFDIV_SHIFT) &
                    AR933X_PLL_CPU_CONFIG_REFDIV_MASK;
-               freq = ath79_ref_clk.rate / t;
+               freq = ref_rate / t;
 
                t = (cpu_config >> AR933X_PLL_CPU_CONFIG_NINT_SHIFT) &
                    AR933X_PLL_CPU_CONFIG_NINT_MASK;
@@ -153,19 +194,24 @@ static void __init ar933x_clocks_init(void)
 
                t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_CPU_DIV_SHIFT) &
                     AR933X_PLL_CLOCK_CTRL_CPU_DIV_MASK) + 1;
-               ath79_cpu_clk.rate = freq / t;
+               cpu_rate = freq / t;
 
                t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_DDR_DIV_SHIFT) &
                      AR933X_PLL_CLOCK_CTRL_DDR_DIV_MASK) + 1;
-               ath79_ddr_clk.rate = freq / t;
+               ddr_rate = freq / t;
 
                t = ((clock_ctrl >> AR933X_PLL_CLOCK_CTRL_AHB_DIV_SHIFT) &
                     AR933X_PLL_CLOCK_CTRL_AHB_DIV_MASK) + 1;
-               ath79_ahb_clk.rate = freq / t;
+               ahb_rate = freq / t;
        }
 
-       ath79_wdt_clk.rate = ath79_ref_clk.rate;
-       ath79_uart_clk.rate = ath79_ref_clk.rate;
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
+
+       clk_add_alias("wdt", NULL, "ahb", NULL);
+       clk_add_alias("uart", NULL, "ref", NULL);
 }
 
 static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac,
@@ -174,12 +220,12 @@ static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac,
        u64 t;
        u32 ret;
 
-       t = ath79_ref_clk.rate;
+       t = ref;
        t *= nint;
        do_div(t, ref_div);
        ret = t;
 
-       t = ath79_ref_clk.rate;
+       t = ref;
        t *= nfrac;
        do_div(t, ref_div * frac);
        ret += t;
@@ -190,6 +236,10 @@ static u32 __init ar934x_get_pll_freq(u32 ref, u32 ref_div, u32 nint, u32 nfrac,
 
 static void __init ar934x_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 pll, out_div, ref_div, nint, nfrac, frac, clk_ctrl, postdiv;
        u32 cpu_pll, ddr_pll;
        u32 bootstrap;
@@ -199,9 +249,9 @@ static void __init ar934x_clocks_init(void)
 
        bootstrap = ath79_reset_rr(AR934X_RESET_REG_BOOTSTRAP);
        if (bootstrap & AR934X_BOOTSTRAP_REF_CLK_40)
-               ath79_ref_clk.rate = 40 * 1000 * 1000;
+               ref_rate = 40 * 1000 * 1000;
        else
-               ath79_ref_clk.rate = 25 * 1000 * 1000;
+               ref_rate = 25 * 1000 * 1000;
 
        pll = __raw_readl(dpll_base + AR934X_SRIF_CPU_DPLL2_REG);
        if (pll & AR934X_SRIF_DPLL2_LOCAL_PLL) {
@@ -227,7 +277,7 @@ static void __init ar934x_clocks_init(void)
                frac = 1 << 6;
        }
 
-       cpu_pll = ar934x_get_pll_freq(ath79_ref_clk.rate, ref_div, nint,
+       cpu_pll = ar934x_get_pll_freq(ref_rate, ref_div, nint,
                                      nfrac, frac, out_div);
 
        pll = __raw_readl(dpll_base + AR934X_SRIF_DDR_DPLL2_REG);
@@ -254,7 +304,7 @@ static void __init ar934x_clocks_init(void)
                frac = 1 << 10;
        }
 
-       ddr_pll = ar934x_get_pll_freq(ath79_ref_clk.rate, ref_div, nint,
+       ddr_pll = ar934x_get_pll_freq(ref_rate, ref_div, nint,
                                      nfrac, frac, out_div);
 
        clk_ctrl = ath79_pll_rr(AR934X_PLL_CPU_DDR_CLK_CTRL_REG);
@@ -263,49 +313,58 @@ static void __init ar934x_clocks_init(void)
                  AR934X_PLL_CPU_DDR_CLK_CTRL_CPU_POST_DIV_MASK;
 
        if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_CPU_PLL_BYPASS)
-               ath79_cpu_clk.rate = ath79_ref_clk.rate;
+               cpu_rate = ref_rate;
        else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_CPUCLK_FROM_CPUPLL)
-               ath79_cpu_clk.rate = cpu_pll / (postdiv + 1);
+               cpu_rate = cpu_pll / (postdiv + 1);
        else
-               ath79_cpu_clk.rate = ddr_pll / (postdiv + 1);
+               cpu_rate = ddr_pll / (postdiv + 1);
 
        postdiv = (clk_ctrl >> AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_POST_DIV_SHIFT) &
                  AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_POST_DIV_MASK;
 
        if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_DDR_PLL_BYPASS)
-               ath79_ddr_clk.rate = ath79_ref_clk.rate;
+               ddr_rate = ref_rate;
        else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_DDRCLK_FROM_DDRPLL)
-               ath79_ddr_clk.rate = ddr_pll / (postdiv + 1);
+               ddr_rate = ddr_pll / (postdiv + 1);
        else
-               ath79_ddr_clk.rate = cpu_pll / (postdiv + 1);
+               ddr_rate = cpu_pll / (postdiv + 1);
 
        postdiv = (clk_ctrl >> AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_POST_DIV_SHIFT) &
                  AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_POST_DIV_MASK;
 
        if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_AHB_PLL_BYPASS)
-               ath79_ahb_clk.rate = ath79_ref_clk.rate;
+               ahb_rate = ref_rate;
        else if (clk_ctrl & AR934X_PLL_CPU_DDR_CLK_CTRL_AHBCLK_FROM_DDRPLL)
-               ath79_ahb_clk.rate = ddr_pll / (postdiv + 1);
+               ahb_rate = ddr_pll / (postdiv + 1);
        else
-               ath79_ahb_clk.rate = cpu_pll / (postdiv + 1);
+               ahb_rate = cpu_pll / (postdiv + 1);
+
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
 
-       ath79_wdt_clk.rate = ath79_ref_clk.rate;
-       ath79_uart_clk.rate = ath79_ref_clk.rate;
+       clk_add_alias("wdt", NULL, "ref", NULL);
+       clk_add_alias("uart", NULL, "ref", NULL);
 
        iounmap(dpll_base);
 }
 
 static void __init qca955x_clocks_init(void)
 {
+       unsigned long ref_rate;
+       unsigned long cpu_rate;
+       unsigned long ddr_rate;
+       unsigned long ahb_rate;
        u32 pll, out_div, ref_div, nint, frac, clk_ctrl, postdiv;
        u32 cpu_pll, ddr_pll;
        u32 bootstrap;
 
        bootstrap = ath79_reset_rr(QCA955X_RESET_REG_BOOTSTRAP);
        if (bootstrap & QCA955X_BOOTSTRAP_REF_CLK_40)
-               ath79_ref_clk.rate = 40 * 1000 * 1000;
+               ref_rate = 40 * 1000 * 1000;
        else
-               ath79_ref_clk.rate = 25 * 1000 * 1000;
+               ref_rate = 25 * 1000 * 1000;
 
        pll = ath79_pll_rr(QCA955X_PLL_CPU_CONFIG_REG);
        out_div = (pll >> QCA955X_PLL_CPU_CONFIG_OUTDIV_SHIFT) &
@@ -317,8 +376,8 @@ static void __init qca955x_clocks_init(void)
        frac = (pll >> QCA955X_PLL_CPU_CONFIG_NFRAC_SHIFT) &
               QCA955X_PLL_CPU_CONFIG_NFRAC_MASK;
 
-       cpu_pll = nint * ath79_ref_clk.rate / ref_div;
-       cpu_pll += frac * ath79_ref_clk.rate / (ref_div * (1 << 6));
+       cpu_pll = nint * ref_rate / ref_div;
+       cpu_pll += frac * ref_rate / (ref_div * (1 << 6));
        cpu_pll /= (1 << out_div);
 
        pll = ath79_pll_rr(QCA955X_PLL_DDR_CONFIG_REG);
@@ -331,8 +390,8 @@ static void __init qca955x_clocks_init(void)
        frac = (pll >> QCA955X_PLL_DDR_CONFIG_NFRAC_SHIFT) &
               QCA955X_PLL_DDR_CONFIG_NFRAC_MASK;
 
-       ddr_pll = nint * ath79_ref_clk.rate / ref_div;
-       ddr_pll += frac * ath79_ref_clk.rate / (ref_div * (1 << 10));
+       ddr_pll = nint * ref_rate / ref_div;
+       ddr_pll += frac * ref_rate / (ref_div * (1 << 10));
        ddr_pll /= (1 << out_div);
 
        clk_ctrl = ath79_pll_rr(QCA955X_PLL_CLK_CTRL_REG);
@@ -341,34 +400,39 @@ static void __init qca955x_clocks_init(void)
                  QCA955X_PLL_CLK_CTRL_CPU_POST_DIV_MASK;
 
        if (clk_ctrl & QCA955X_PLL_CLK_CTRL_CPU_PLL_BYPASS)
-               ath79_cpu_clk.rate = ath79_ref_clk.rate;
+               cpu_rate = ref_rate;
        else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_CPUCLK_FROM_CPUPLL)
-               ath79_cpu_clk.rate = ddr_pll / (postdiv + 1);
+               cpu_rate = ddr_pll / (postdiv + 1);
        else
-               ath79_cpu_clk.rate = cpu_pll / (postdiv + 1);
+               cpu_rate = cpu_pll / (postdiv + 1);
 
        postdiv = (clk_ctrl >> QCA955X_PLL_CLK_CTRL_DDR_POST_DIV_SHIFT) &
                  QCA955X_PLL_CLK_CTRL_DDR_POST_DIV_MASK;
 
        if (clk_ctrl & QCA955X_PLL_CLK_CTRL_DDR_PLL_BYPASS)
-               ath79_ddr_clk.rate = ath79_ref_clk.rate;
+               ddr_rate = ref_rate;
        else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_DDRCLK_FROM_DDRPLL)
-               ath79_ddr_clk.rate = cpu_pll / (postdiv + 1);
+               ddr_rate = cpu_pll / (postdiv + 1);
        else
-               ath79_ddr_clk.rate = ddr_pll / (postdiv + 1);
+               ddr_rate = ddr_pll / (postdiv + 1);
 
        postdiv = (clk_ctrl >> QCA955X_PLL_CLK_CTRL_AHB_POST_DIV_SHIFT) &
                  QCA955X_PLL_CLK_CTRL_AHB_POST_DIV_MASK;
 
        if (clk_ctrl & QCA955X_PLL_CLK_CTRL_AHB_PLL_BYPASS)
-               ath79_ahb_clk.rate = ath79_ref_clk.rate;
+               ahb_rate = ref_rate;
        else if (clk_ctrl & QCA955X_PLL_CLK_CTRL_AHBCLK_FROM_DDRPLL)
-               ath79_ahb_clk.rate = ddr_pll / (postdiv + 1);
+               ahb_rate = ddr_pll / (postdiv + 1);
        else
-               ath79_ahb_clk.rate = cpu_pll / (postdiv + 1);
+               ahb_rate = cpu_pll / (postdiv + 1);
 
-       ath79_wdt_clk.rate = ath79_ref_clk.rate;
-       ath79_uart_clk.rate = ath79_ref_clk.rate;
+       ath79_add_sys_clkdev("ref", ref_rate);
+       ath79_add_sys_clkdev("cpu", cpu_rate);
+       ath79_add_sys_clkdev("ddr", ddr_rate);
+       ath79_add_sys_clkdev("ahb", ahb_rate);
+
+       clk_add_alias("wdt", NULL, "ref", NULL);
+       clk_add_alias("uart", NULL, "ref", NULL);
 }
 
 void __init ath79_clocks_init(void)
@@ -387,46 +451,27 @@ void __init ath79_clocks_init(void)
                qca955x_clocks_init();
        else
                BUG();
-
-       pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, "
-               "Ref:%lu.%03luMHz",
-               ath79_cpu_clk.rate / 1000000,
-               (ath79_cpu_clk.rate / 1000) % 1000,
-               ath79_ddr_clk.rate / 1000000,
-               (ath79_ddr_clk.rate / 1000) % 1000,
-               ath79_ahb_clk.rate / 1000000,
-               (ath79_ahb_clk.rate / 1000) % 1000,
-               ath79_ref_clk.rate / 1000000,
-               (ath79_ref_clk.rate / 1000) % 1000);
 }
 
-/*
- * Linux clock API
- */
-struct clk *clk_get(struct device *dev, const char *id)
+unsigned long __init
+ath79_get_sys_clk_rate(const char *id)
 {
-       if (!strcmp(id, "ref"))
-               return &ath79_ref_clk;
-
-       if (!strcmp(id, "cpu"))
-               return &ath79_cpu_clk;
-
-       if (!strcmp(id, "ddr"))
-               return &ath79_ddr_clk;
-
-       if (!strcmp(id, "ahb"))
-               return &ath79_ahb_clk;
+       struct clk *clk;
+       unsigned long rate;
 
-       if (!strcmp(id, "wdt"))
-               return &ath79_wdt_clk;
+       clk = clk_get(NULL, id);
+       if (IS_ERR(clk))
+               panic("unable to get %s clock, err=%d", id, (int) PTR_ERR(clk));
 
-       if (!strcmp(id, "uart"))
-               return &ath79_uart_clk;
+       rate = clk_get_rate(clk);
+       clk_put(clk);
 
-       return ERR_PTR(-ENOENT);
+       return rate;
 }
-EXPORT_SYMBOL(clk_get);
 
+/*
+ * Linux clock API
+ */
 int clk_enable(struct clk *clk)
 {
        return 0;
@@ -443,8 +488,3 @@ unsigned long clk_get_rate(struct clk *clk)
        return clk->rate;
 }
 EXPORT_SYMBOL(clk_get_rate);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL(clk_put);
index 561906c..648d2da 100644 (file)
@@ -21,6 +21,8 @@
 #define ATH79_MEM_SIZE_MAX     (128 * 1024 * 1024)
 
 void ath79_clocks_init(void);
+unsigned long ath79_get_sys_clk_rate(const char *id);
+
 void ath79_ddr_wb_flush(unsigned int reg);
 
 void ath79_gpio_function_enable(u32 mask);
index a3a2741..c3b04c9 100644 (file)
@@ -81,21 +81,19 @@ static struct platform_device ar933x_uart_device = {
 
 void __init ath79_register_uart(void)
 {
-       struct clk *clk;
+       unsigned long uart_clk_rate;
 
-       clk = clk_get(NULL, "uart");
-       if (IS_ERR(clk))
-               panic("unable to get UART clock, err=%ld", PTR_ERR(clk));
+       uart_clk_rate = ath79_get_sys_clk_rate("uart");
 
        if (soc_is_ar71xx() ||
            soc_is_ar724x() ||
            soc_is_ar913x() ||
            soc_is_ar934x() ||
            soc_is_qca955x()) {
-               ath79_uart_data[0].uartclk = clk_get_rate(clk);
+               ath79_uart_data[0].uartclk = uart_clk_rate;
                platform_device_register(&ath79_uart_device);
        } else if (soc_is_ar933x()) {
-               ar933x_uart_data.uartclk = clk_get_rate(clk);
+               ar933x_uart_data.uartclk = uart_clk_rate;
                platform_device_register(&ar933x_uart_device);
        } else {
                BUG();
index 80f4ecd..64807a4 100644 (file)
@@ -200,7 +200,6 @@ void __init plat_mem_setup(void)
 
        ath79_detect_sys_type();
        detect_memory_region(0, ATH79_MEM_SIZE_MIN, ATH79_MEM_SIZE_MAX);
-       ath79_clocks_init();
 
        _machine_restart = ath79_restart;
        _machine_halt = ath79_halt;
@@ -209,13 +208,25 @@ void __init plat_mem_setup(void)
 
 void __init plat_time_init(void)
 {
-       struct clk *clk;
+       unsigned long cpu_clk_rate;
+       unsigned long ahb_clk_rate;
+       unsigned long ddr_clk_rate;
+       unsigned long ref_clk_rate;
+
+       ath79_clocks_init();
+
+       cpu_clk_rate = ath79_get_sys_clk_rate("cpu");
+       ahb_clk_rate = ath79_get_sys_clk_rate("ahb");
+       ddr_clk_rate = ath79_get_sys_clk_rate("ddr");
+       ref_clk_rate = ath79_get_sys_clk_rate("ref");
 
-       clk = clk_get(NULL, "cpu");
-       if (IS_ERR(clk))
-               panic("unable to get CPU clock, err=%ld", PTR_ERR(clk));
+       pr_info("Clocks: CPU:%lu.%03luMHz, DDR:%lu.%03luMHz, AHB:%lu.%03luMHz, Ref:%lu.%03luMHz",
+               cpu_clk_rate / 1000000, (cpu_clk_rate / 1000) % 1000,
+               ddr_clk_rate / 1000000, (ddr_clk_rate / 1000) % 1000,
+               ahb_clk_rate / 1000000, (ahb_clk_rate / 1000) % 1000,
+               ref_clk_rate / 1000000, (ref_clk_rate / 1000) % 1000);
 
-       mips_hpt_frequency = clk_get_rate(clk) / 2;
+       mips_hpt_frequency = cpu_clk_rate / 2;
 }
 
 static int __init ath79_setup(void)
index f210b09..a73d6e2 100644 (file)
@@ -4,3 +4,4 @@ vmlinux.*
 zImage
 zImage.tmp
 calc_vmlinuz_load_addr
+uImage
index 851261e..1466c00 100644 (file)
@@ -40,3 +40,18 @@ quiet_cmd_srec = OBJCOPY $@
       cmd_srec = $(OBJCOPY) -S -O srec $(strip-flags) $(VMLINUX) $@
 $(obj)/vmlinux.srec: $(VMLINUX) FORCE
        $(call if_changed,srec)
+
+UIMAGE_LOADADDR  = $(VMLINUX_LOAD_ADDRESS)
+UIMAGE_ENTRYADDR = $(VMLINUX_ENTRY_ADDRESS)
+
+$(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE
+       $(call if_changed,gzip)
+
+targets += uImage.gz
+$(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE
+       $(call if_changed,uimage,gzip)
+
+targets += uImage
+$(obj)/uImage: $(obj)/uImage.gz FORCE
+       @ln -sf $(notdir $<) $@
+       @echo '  Image $@ is ready'
index bb1dbf4..0048c08 100644 (file)
@@ -25,7 +25,7 @@ KBUILD_CFLAGS := $(LINUXINCLUDE) $(KBUILD_CFLAGS) -D__KERNEL__ \
 
 KBUILD_AFLAGS := $(LINUXINCLUDE) $(KBUILD_AFLAGS) -D__ASSEMBLY__ \
        -DBOOT_HEAP_SIZE=$(BOOT_HEAP_SIZE) \
-       -DKERNEL_ENTRY=0x$(shell $(NM) $(objtree)/$(KBUILD_IMAGE) 2>/dev/null | grep " kernel_entry" | cut -f1 -d \ )
+       -DKERNEL_ENTRY=$(VMLINUX_ENTRY_ADDRESS)
 
 targets := head.o decompress.o dbg.o uart-16550.o uart-alchemy.o
 
diff --git a/arch/mips/boot/dts/include/dt-bindings b/arch/mips/boot/dts/include/dt-bindings
new file mode 120000 (symlink)
index 0000000..68ae388
--- /dev/null
@@ -0,0 +1 @@
+../../../../../include/dt-bindings
index 9d36774..25fbfae 100644 (file)
@@ -1776,7 +1776,7 @@ asmlinkage void plat_irq_dispatch(void)
 
 #ifdef CONFIG_HOTPLUG_CPU
 
-void fixup_irqs(void)
+void octeon_fixup_irqs(void)
 {
        irq_cpu_offline();
 }
index 48b08eb..b212ae1 100644 (file)
@@ -8,6 +8,7 @@
  *   written by Ralf Baechle <ralf@linux-mips.org>
  */
 #include <linux/compiler.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/console.h>
@@ -1139,3 +1140,30 @@ static int __init edac_devinit(void)
        return err;
 }
 device_initcall(edac_devinit);
+
+static void __initdata *octeon_dummy_iospace;
+
+static int __init octeon_no_pci_init(void)
+{
+       /*
+        * Initially assume there is no PCI. The PCI/PCIe platform code will
+        * later re-initialize these to correct values if they are present.
+        */
+       octeon_dummy_iospace = vzalloc(IO_SPACE_LIMIT);
+       set_io_port_base((unsigned long)octeon_dummy_iospace);
+       ioport_resource.start = MAX_RESOURCE;
+       ioport_resource.end = 0;
+       return 0;
+}
+core_initcall(octeon_no_pci_init);
+
+static int __init octeon_no_pci_release(void)
+{
+       /*
+        * Release the allocated memory if a real IO space is there.
+        */
+       if ((unsigned long)octeon_dummy_iospace != mips_io_port_base)
+               vfree(octeon_dummy_iospace);
+       return 0;
+}
+late_initcall(octeon_no_pci_release);
index 138cc80..24a2167 100644 (file)
@@ -255,8 +255,6 @@ static void octeon_cpus_done(void)
 /* State of each CPU. */
 DEFINE_PER_CPU(int, cpu_state);
 
-extern void fixup_irqs(void);
-
 static int octeon_cpu_disable(void)
 {
        unsigned int cpu = smp_processor_id();
@@ -267,7 +265,7 @@ static int octeon_cpu_disable(void)
        set_cpu_online(cpu, false);
        cpu_clear(cpu, cpu_callin_map);
        local_irq_disable();
-       fixup_irqs();
+       octeon_fixup_irqs();
        local_irq_enable();
 
        flush_cache_all();
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
new file mode 100644 (file)
index 0000000..8987846
--- /dev/null
@@ -0,0 +1,159 @@
+CONFIG_LANTIQ=y
+CONFIG_XRX200_PHY_FW=y
+CONFIG_CPU_MIPS32_R2=y
+# CONFIG_COMPACTION is not set
+# CONFIG_CROSS_MEMORY_ATTACH is not set
+CONFIG_HZ_100=y
+# CONFIG_SECCOMP is not set
+# CONFIG_LOCALVERSION_AUTO is not set
+CONFIG_SYSVIPC=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_KALLSYMS_ALL=y
+# CONFIG_AIO is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_SLUB_DEBUG is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+# CONFIG_IOSCHED_CFQ is not set
+# CONFIG_COREDUMP is not set
+# CONFIG_SUSPEND is not set
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_ARPD=y
+CONFIG_SYN_COOKIES=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_LRO is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+CONFIG_NETFILTER=y
+# CONFIG_BRIDGE_NETFILTER is not set
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_IRC=m
+CONFIG_NETFILTER_XT_TARGET_CT=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
+CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
+CONFIG_NETFILTER_XT_MATCH_COMMENT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_LIMIT=m
+CONFIG_NETFILTER_XT_MATCH_MAC=m
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NF_CONNTRACK_IPV4=m
+# CONFIG_NF_CONNTRACK_PROC_COMPAT is not set
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_RAW=m
+CONFIG_BRIDGE=y
+# CONFIG_BRIDGE_IGMP_SNOOPING is not set
+CONFIG_VLAN_8021Q=y
+CONFIG_NET_SCHED=y
+CONFIG_HAMRADIO=y
+CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
+# CONFIG_FIRMWARE_IN_KERNEL is not set
+CONFIG_MTD=y
+CONFIG_MTD_CMDLINE_PARTS=y
+CONFIG_MTD_BLOCK=y
+CONFIG_MTD_CFI=y
+CONFIG_MTD_CFI_AMDSTD=y
+CONFIG_MTD_COMPLEX_MAPPINGS=y
+CONFIG_MTD_PHYSMAP=y
+CONFIG_MTD_PHYSMAP_OF=y
+CONFIG_MTD_LANTIQ=y
+CONFIG_EEPROM_93CX6=m
+CONFIG_SCSI=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_NETDEVICES=y
+CONFIG_LANTIQ_ETOP=y
+# CONFIG_NET_VENDOR_WIZNET is not set
+CONFIG_PHYLIB=y
+CONFIG_PPP=m
+CONFIG_PPP_FILTER=y
+CONFIG_PPP_MULTILINK=y
+CONFIG_PPPOE=m
+CONFIG_PPP_ASYNC=m
+CONFIG_ISDN=y
+CONFIG_INPUT=m
+CONFIG_INPUT_POLLDEV=m
+# CONFIG_INPUT_MOUSEDEV is not set
+# CONFIG_KEYBOARD_ATKBD is not set
+# CONFIG_INPUT_MOUSE is not set
+CONFIG_INPUT_MISC=y
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_DEVKMEM is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_RUNTIME_UARTS=2
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SPI=y
+CONFIG_GPIO_MM_LANTIQ=y
+CONFIG_GPIO_STP_XWAY=y
+# CONFIG_HWMON is not set
+CONFIG_WATCHDOG=y
+# CONFIG_HID is not set
+# CONFIG_USB_HID is not set
+CONFIG_USB=y
+CONFIG_USB_ANNOUNCE_NEW_DEVICES=y
+CONFIG_USB_STORAGE=y
+CONFIG_USB_STORAGE_DEBUG=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_TRIGGERS=y
+CONFIG_LEDS_TRIGGER_TIMER=y
+CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
+CONFIG_STAGING=y
+# CONFIG_IOMMU_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_PROC_PAGE_MONITOR is not set
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
+CONFIG_JFFS2_FS=y
+CONFIG_JFFS2_SUMMARY=y
+CONFIG_JFFS2_FS_XATTR=y
+# CONFIG_JFFS2_FS_POSIX_ACL is not set
+# CONFIG_JFFS2_FS_SECURITY is not set
+CONFIG_JFFS2_COMPRESSION_OPTIONS=y
+# CONFIG_JFFS2_ZLIB is not set
+CONFIG_SQUASHFS=y
+# CONFIG_SQUASHFS_ZLIB is not set
+CONFIG_SQUASHFS_XZ=y
+CONFIG_PRINTK_TIME=y
+# CONFIG_ENABLE_MUST_CHECK is not set
+CONFIG_STRIP_ASM_SYMS=y
+CONFIG_DEBUG_FS=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_FTRACE is not set
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CRYPTO_MANAGER=m
+CONFIG_CRYPTO_ARC4=m
+# CONFIG_CRYPTO_ANSI_CPRNG is not set
+CONFIG_CRC_ITU_T=m
+CONFIG_CRC32_SARWATE=y
+CONFIG_AVERAGE=y
index 824e08c..4b3e3a4 100644 (file)
@@ -51,6 +51,14 @@ static struct irq_chip ioasic_irq_type = {
        .irq_unmask = unmask_ioasic_irq,
 };
 
+void clear_ioasic_dma_irq(unsigned int irq)
+{
+       u32 sir;
+
+       sir = ~(1 << (irq - ioasic_irq_base));
+       ioasic_write(IO_REG_SIR, sir);
+}
+
 static struct irq_chip ioasic_dma_irq_type = {
        .name = "IO-ASIC-DMA",
        .irq_ack = ack_ioasic_irq,
index ea57f39..1914e56 100644 (file)
@@ -125,13 +125,18 @@ int rtc_mips_set_mmss(unsigned long nowtime)
 
 void __init plat_time_init(void)
 {
+       int ioasic_clock = 0;
        u32 start, end;
-       int i = HZ / 10;
+       int i = HZ / 8;
 
        /* Set up the rate of periodic DS1287 interrupts. */
        ds1287_set_base_clock(HZ);
 
+       /* On some I/O ASIC systems we have the I/O ASIC's counter.  */
+       if (IOASIC)
+               ioasic_clock = dec_ioasic_clocksource_init() == 0;
        if (cpu_has_counter) {
+               ds1287_timer_state();
                while (!ds1287_timer_state())
                        ;
 
@@ -143,12 +148,24 @@ void __init plat_time_init(void)
 
                end = read_c0_count();
 
-               mips_hpt_frequency = (end - start) * 10;
+               mips_hpt_frequency = (end - start) * 8;
                printk(KERN_INFO "MIPS counter frequency %dHz\n",
                        mips_hpt_frequency);
-       } else if (IOASIC)
-               /* For pre-R4k systems we use the I/O ASIC's counter.  */
-               dec_ioasic_clocksource_init();
+
+               /*
+                * All R4k DECstations suffer from the CP0 Count erratum,
+                * so we can't use the timer as a clock source, and a clock
+                * event both at a time.  An accurate wall clock is more
+                * important than a high-precision interval timer so only
+                * use the timer as a clock source, and not a clock event
+                * if there's no I/O ASIC counter available to serve as a
+                * clock source.
+                */
+               if (!ioasic_clock) {
+                       init_r4k_clocksource();
+                       mips_hpt_frequency = 0;
+               }
+       }
 
        ds1287_clockevent_init(dec_interrupt[DEC_IRQ_RTC]);
 }
index 9b54b7a..454ddf9 100644 (file)
@@ -1,2 +1,15 @@
 # MIPS headers
+generic-y += cputime.h
+generic-y += current.h
+generic-y += emergency-restart.h
+generic-y += local64.h
+generic-y += mutex.h
+generic-y += parport.h
+generic-y += percpu.h
+generic-y += scatterlist.h
+generic-y += sections.h
+generic-y += segment.h
+generic-y += serial.h
 generic-y += trace_clock.h
+generic-y += ucontext.h
+generic-y += xor.h
index 552a65a..27bd060 100644 (file)
@@ -65,44 +65,33 @@ static inline unsigned long bmips_read_zscm_reg(unsigned int offset)
 {
        unsigned long ret;
 
-       __asm__ __volatile__(
-               ".set push\n"
-               ".set noreorder\n"
-               "cache %1, 0(%2)\n"
-               "sync\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "mfc0 %0, $28, 3\n"
-               "_ssnop\n"
-               ".set pop\n"
-               : "=&r" (ret)
-               : "i" (Index_Load_Tag_S), "r" (ZSCM_REG_BASE + offset)
-               : "memory");
+       barrier();
+       cache_op(Index_Load_Tag_S, ZSCM_REG_BASE + offset);
+       __sync();
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       ret = read_c0_ddatalo();
+       _ssnop();
+
        return ret;
 }
 
 static inline void bmips_write_zscm_reg(unsigned int offset, unsigned long data)
 {
-       __asm__ __volatile__(
-               ".set push\n"
-               ".set noreorder\n"
-               "mtc0 %0, $28, 3\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "cache %1, 0(%2)\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               "_ssnop\n"
-               : /* no outputs */
-               : "r" (data),
-                 "i" (Index_Store_Tag_S), "r" (ZSCM_REG_BASE + offset)
-               : "memory");
+       write_c0_ddatalo(data);
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       cache_op(Index_Store_Tag_S, ZSCM_REG_BASE + offset);
+       _ssnop();
+       _ssnop();
+       _ssnop();
+       barrier();
 }
 
 #endif /* !defined(__ASSEMBLY__) */
index 632bbe5..71b9f19 100644 (file)
 #define PRID_IMP_CAVIUM_CN68XX 0x9100
 #define PRID_IMP_CAVIUM_CN66XX 0x9200
 #define PRID_IMP_CAVIUM_CN61XX 0x9300
+#define PRID_IMP_CAVIUM_CNF71XX 0x9400
+#define PRID_IMP_CAVIUM_CN78XX 0x9500
+#define PRID_IMP_CAVIUM_CN70XX 0x9600
 
 /*
  * These are the PRID's for when 23:16 == PRID_COMP_INGENIC
 
 #define PRID_IMP_NETLOGIC_XLP8XX       0x1000
 #define PRID_IMP_NETLOGIC_XLP3XX       0x1100
+#define PRID_IMP_NETLOGIC_XLP2XX       0x1200
 
 /*
  * Definitions for 7:0 on legacy processors
@@ -272,7 +276,7 @@ enum cpu_type_enum {
         */
        CPU_5KC, CPU_5KE, CPU_20KC, CPU_25KF, CPU_SB1, CPU_SB1A, CPU_LOONGSON2,
        CPU_CAVIUM_OCTEON, CPU_CAVIUM_OCTEON_PLUS, CPU_CAVIUM_OCTEON2,
-       CPU_XLR, CPU_XLP,
+       CPU_CAVIUM_OCTEON3, CPU_XLR, CPU_XLP,
 
        CPU_LAST
 };
diff --git a/arch/mips/include/asm/cputime.h b/arch/mips/include/asm/cputime.h
deleted file mode 100644 (file)
index c00eacb..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __MIPS_CPUTIME_H
-#define __MIPS_CPUTIME_H
-
-#include <asm-generic/cputime.h>
-
-#endif /* __MIPS_CPUTIME_H */
diff --git a/arch/mips/include/asm/current.h b/arch/mips/include/asm/current.h
deleted file mode 100644 (file)
index 4c51401..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/current.h>
index 98badd6..a6e505a 100644 (file)
@@ -31,8 +31,10 @@ static inline u32 ioasic_read(unsigned int reg)
        return ioasic_base[reg / 4];
 }
 
+extern void clear_ioasic_dma_irq(unsigned int irq);
+
 extern void init_ioasic_irqs(int base);
 
-extern void dec_ioasic_clocksource_init(void);
+extern int dec_ioasic_clocksource_init(void);
 
 #endif /* __ASM_DEC_IOASIC_H */
diff --git a/arch/mips/include/asm/emergency-restart.h b/arch/mips/include/asm/emergency-restart.h
deleted file mode 100644 (file)
index 108d8c4..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_EMERGENCY_RESTART_H
-#define _ASM_EMERGENCY_RESTART_H
-
-#include <asm-generic/emergency-restart.h>
-
-#endif /* _ASM_EMERGENCY_RESTART_H */
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
index ddb947e..0089a74 100644 (file)
@@ -42,8 +42,6 @@
 #define cpu_has_mips64r1       0
 #define cpu_has_mips64r2       0
 
-#define cpu_has_dsp            0
-#define cpu_has_dsp2           0
 #define cpu_has_mipsmt         0
 
 #define cpu_has_64bits         0
diff --git a/arch/mips/include/asm/mach-cavium-octeon/gpio.h b/arch/mips/include/asm/mach-cavium-octeon/gpio.h
new file mode 100644 (file)
index 0000000..34e9f7a
--- /dev/null
@@ -0,0 +1,21 @@
+#ifndef __ASM_MACH_CAVIUM_OCTEON_GPIO_H
+#define __ASM_MACH_CAVIUM_OCTEON_GPIO_H
+
+#ifdef CONFIG_GPIOLIB
+#define gpio_get_value __gpio_get_value
+#define gpio_set_value __gpio_set_value
+#define gpio_cansleep  __gpio_cansleep
+#else
+int gpio_request(unsigned gpio, const char *label);
+void gpio_free(unsigned gpio);
+int gpio_direction_input(unsigned gpio);
+int gpio_direction_output(unsigned gpio, int value);
+int gpio_get_value(unsigned gpio);
+void gpio_set_value(unsigned gpio, int value);
+#endif
+
+#include <asm-generic/gpio.h>
+
+#define gpio_to_irq    __gpio_to_irq
+
+#endif /* __ASM_MACH_GENERIC_GPIO_H */
diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..096a100
--- /dev/null
@@ -0,0 +1,58 @@
+/*
+ *  Lantiq FALCON specific CPU feature overrides
+ *
+ *  Copyright (C) 2013 Thomas Langer, Lantiq Deutschland
+ *
+ *  This file was derived from: include/asm-mips/cpu-features.h
+ *     Copyright (C) 2003, 2004 Ralf Baechle
+ *     Copyright (C) 2004 Maciej W. Rozycki
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ */
+#ifndef __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H
+#define __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_3k_cache       0
+#define cpu_has_4k_cache       1
+#define cpu_has_tx39_cache     0
+#define cpu_has_sb1_cache      0
+#define cpu_has_fpu            0
+#define cpu_has_32fpr          0
+#define cpu_has_counter                1
+#define cpu_has_watch          1
+#define cpu_has_divec          1
+
+#define cpu_has_prefetch       1
+#define cpu_has_ejtag          1
+#define cpu_has_llsc           1
+
+#define cpu_has_mips16         1
+#define cpu_has_mdmx           0
+#define cpu_has_mips3d         0
+#define cpu_has_smartmips      0
+
+#define cpu_has_mips32r1       1
+#define cpu_has_mips32r2       1
+#define cpu_has_mips64r1       0
+#define cpu_has_mips64r2       0
+
+#define cpu_has_dsp            1
+#define cpu_has_mipsmt         1
+
+#define cpu_has_vint           1
+#define cpu_has_veic           1
+
+#define cpu_has_64bits         0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs  0
+#define cpu_has_64bit_addresses        0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#endif /* __ASM_MACH_FALCON_CPU_FEATURE_OVERRIDES_H */
index 9809972..6f9b24f 100644 (file)
@@ -20,6 +20,8 @@
 #define SYSC_REG_CHIP_REV              0x0c
 #define SYSC_REG_SYSTEM_CONFIG0                0x10
 #define SYSC_REG_SYSTEM_CONFIG1                0x14
+#define SYSC_REG_CLKCFG0               0x2c
+#define SYSC_REG_CPU_SYS_CLKCFG                0x3c
 #define SYSC_REG_CPLL_CONFIG0          0x54
 #define SYSC_REG_CPLL_CONFIG1          0x58
 
 #define MT7620A_CHIP_NAME0             0x3637544d
 #define MT7620A_CHIP_NAME1             0x20203032
 
+#define SYSCFG0_XTAL_FREQ_SEL          BIT(6)
+
 #define CHIP_REV_PKG_MASK              0x1
 #define CHIP_REV_PKG_SHIFT             16
 #define CHIP_REV_VER_MASK              0xf
 #define CHIP_REV_VER_SHIFT             8
 #define CHIP_REV_ECO_MASK              0xf
 
-#define CPLL_SW_CONFIG_SHIFT           31
-#define CPLL_SW_CONFIG_MASK            0x1
-#define CPLL_CPU_CLK_SHIFT             24
-#define CPLL_CPU_CLK_MASK              0x1
-#define CPLL_MULT_RATIO_SHIFT           16
-#define CPLL_MULT_RATIO                 0x7
-#define CPLL_DIV_RATIO_SHIFT            10
-#define CPLL_DIV_RATIO                  0x3
+#define CLKCFG0_PERI_CLK_SEL           BIT(4)
+
+#define CPU_SYS_CLKCFG_OCP_RATIO_SHIFT 16
+#define CPU_SYS_CLKCFG_OCP_RATIO_MASK  0xf
+#define CPU_SYS_CLKCFG_OCP_RATIO_1     0       /* 1:1   (Reserved) */
+#define CPU_SYS_CLKCFG_OCP_RATIO_1_5   1       /* 1:1.5 (Reserved) */
+#define CPU_SYS_CLKCFG_OCP_RATIO_2     2       /* 1:2   */
+#define CPU_SYS_CLKCFG_OCP_RATIO_2_5   3       /* 1:2.5 (Reserved) */
+#define CPU_SYS_CLKCFG_OCP_RATIO_3     4       /* 1:3   */
+#define CPU_SYS_CLKCFG_OCP_RATIO_3_5   5       /* 1:3.5 (Reserved) */
+#define CPU_SYS_CLKCFG_OCP_RATIO_4     6       /* 1:4   */
+#define CPU_SYS_CLKCFG_OCP_RATIO_5     7       /* 1:5   */
+#define CPU_SYS_CLKCFG_OCP_RATIO_10    8       /* 1:10  */
+#define CPU_SYS_CLKCFG_CPU_FDIV_SHIFT  8
+#define CPU_SYS_CLKCFG_CPU_FDIV_MASK   0x1f
+#define CPU_SYS_CLKCFG_CPU_FFRAC_SHIFT 0
+#define CPU_SYS_CLKCFG_CPU_FFRAC_MASK  0x1f
+
+#define CPLL_CFG0_SW_CFG               BIT(31)
+#define CPLL_CFG0_PLL_MULT_RATIO_SHIFT 16
+#define CPLL_CFG0_PLL_MULT_RATIO_MASK   0x7
+#define CPLL_CFG0_LC_CURFCK            BIT(15)
+#define CPLL_CFG0_BYPASS_REF_CLK       BIT(14)
+#define CPLL_CFG0_PLL_DIV_RATIO_SHIFT  10
+#define CPLL_CFG0_PLL_DIV_RATIO_MASK   0x3
+
+#define CPLL_CFG1_CPU_AUX1             BIT(25)
+#define CPLL_CFG1_CPU_AUX0             BIT(24)
 
 #define SYSCFG0_DRAM_TYPE_MASK         0x3
 #define SYSCFG0_DRAM_TYPE_SHIFT                4
diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h
new file mode 100644 (file)
index 0000000..f7bb8cf
--- /dev/null
@@ -0,0 +1,57 @@
+/*
+ * Ralink MT7620 specific CPU feature overrides
+ *
+ * Copyright (C) 2008-2009 Gabor Juhos <juhosg@openwrt.org>
+ * Copyright (C) 2008 Imre Kaloz <kaloz@openwrt.org>
+ *
+ * This file was derived from: include/asm-mips/cpu-features.h
+ *     Copyright (C) 2003, 2004 Ralf Baechle
+ *     Copyright (C) 2004 Maciej W. Rozycki
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ */
+#ifndef _MT7620_CPU_FEATURE_OVERRIDES_H
+#define _MT7620_CPU_FEATURE_OVERRIDES_H
+
+#define cpu_has_tlb            1
+#define cpu_has_4kex           1
+#define cpu_has_3k_cache       0
+#define cpu_has_4k_cache       1
+#define cpu_has_tx39_cache     0
+#define cpu_has_sb1_cache      0
+#define cpu_has_fpu            0
+#define cpu_has_32fpr          0
+#define cpu_has_counter                1
+#define cpu_has_watch          1
+#define cpu_has_divec          1
+
+#define cpu_has_prefetch       1
+#define cpu_has_ejtag          1
+#define cpu_has_llsc           1
+
+#define cpu_has_mips16         1
+#define cpu_has_mdmx           0
+#define cpu_has_mips3d         0
+#define cpu_has_smartmips      0
+
+#define cpu_has_mips32r1       1
+#define cpu_has_mips32r2       1
+#define cpu_has_mips64r1       0
+#define cpu_has_mips64r2       0
+
+#define cpu_has_dsp            1
+#define cpu_has_dsp2           0
+#define cpu_has_mipsmt         0
+
+#define cpu_has_64bits         0
+#define cpu_has_64bit_zero_reg 0
+#define cpu_has_64bit_gp_regs  0
+#define cpu_has_64bit_addresses        0
+
+#define cpu_dcache_line_size() 32
+#define cpu_icache_line_size() 32
+
+#endif /* _MT7620_CPU_FEATURE_OVERRIDES_H */
diff --git a/arch/mips/include/asm/mutex.h b/arch/mips/include/asm/mutex.h
deleted file mode 100644 (file)
index 458c1f7..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-/*
- * Pull in the generic implementation for the mutex fastpath.
- *
- * TODO: implement optimized primitives instead, or leave the generic
- * implementation in place, or pick the atomic_xchg() based generic
- * implementation. (see asm-generic/mutex-xchg.h for details)
- */
-
-#include <asm-generic/mutex-dec.h>
index 790f0f1..4e8eacb 100644 (file)
@@ -88,6 +88,7 @@
 #define BRIDGE_DRAM_LIMIT6             0x22
 #define BRIDGE_DRAM_LIMIT7             0x23
 
+#define BRIDGE_DRAM_NODE_TRANSLN(i)    (0x24 + (i))
 #define BRIDGE_DRAM_NODE_TRANSLN0      0x24
 #define BRIDGE_DRAM_NODE_TRANSLN1      0x25
 #define BRIDGE_DRAM_NODE_TRANSLN2      0x26
@@ -96,6 +97,8 @@
 #define BRIDGE_DRAM_NODE_TRANSLN5      0x29
 #define BRIDGE_DRAM_NODE_TRANSLN6      0x2a
 #define BRIDGE_DRAM_NODE_TRANSLN7      0x2b
+
+#define BRIDGE_DRAM_CHNL_TRANSLN(i)    (0x2c + (i))
 #define BRIDGE_DRAM_CHNL_TRANSLN0      0x2c
 #define BRIDGE_DRAM_CHNL_TRANSLN1      0x2d
 #define BRIDGE_DRAM_CHNL_TRANSLN2      0x2e
 #define BRIDGE_DRAM_CHNL_TRANSLN5      0x31
 #define BRIDGE_DRAM_CHNL_TRANSLN6      0x32
 #define BRIDGE_DRAM_CHNL_TRANSLN7      0x33
+
 #define BRIDGE_PCIEMEM_BASE0           0x34
 #define BRIDGE_PCIEMEM_BASE1           0x35
 #define BRIDGE_PCIEMEM_BASE2           0x36
index 9fac46f..55eee77 100644 (file)
 #define XLP_IO_USB_OHCI2_OFFSET(node)  XLP_HDR_OFFSET(node, 0, 2, 4)
 #define XLP_IO_USB_OHCI3_OFFSET(node)  XLP_HDR_OFFSET(node, 0, 2, 5)
 
+/* XLP2xx has an updated USB block */
+#define XLP2XX_IO_USB_OFFSET(node, i)  XLP_HDR_OFFSET(node, 0, 4, i)
+#define XLP2XX_IO_USB_XHCI0_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 4, 1)
+#define XLP2XX_IO_USB_XHCI1_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 4, 2)
+#define XLP2XX_IO_USB_XHCI2_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 4, 3)
+
 #define XLP_IO_NAE_OFFSET(node)                XLP_HDR_OFFSET(node, 0, 3, 0)
 #define XLP_IO_POE_OFFSET(node)                XLP_HDR_OFFSET(node, 0, 3, 1)
 
@@ -88,6 +94,9 @@
 #define XLP_IO_I2C0_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 6, 2)
 #define XLP_IO_I2C1_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 6, 3)
 #define XLP_IO_GPIO_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 6, 4)
+/* on 2XX, all I2C busses are on the same block */
+#define XLP2XX_IO_I2C_OFFSET(node)     XLP_HDR_OFFSET(node, 0, 6, 7)
+
 /* system management */
 #define XLP_IO_SYS_OFFSET(node)                XLP_HDR_OFFSET(node, 0, 6, 5)
 #define XLP_IO_JTAG_OFFSET(node)       XLP_HDR_OFFSET(node, 0, 6, 6)
 #define PCI_DEVICE_ID_NLM_NOR          0x1015
 #define PCI_DEVICE_ID_NLM_NAND         0x1016
 #define PCI_DEVICE_ID_NLM_MMC          0x1018
+#define PCI_DEVICE_ID_NLM_XHCI         0x101d
 
 #ifndef __ASSEMBLY__
 
index 4b5108d..105389b 100644 (file)
 #define PIC_LOCAL_SCHEDULING           1
 #define PIC_GLOBAL_SCHEDULING          0
 
-#define PIC_CLK_HZ                     133333333
-
 #define nlm_read_pic_reg(b, r) nlm_read_reg64(b, r)
 #define nlm_write_pic_reg(b, r, v) nlm_write_reg64(b, r, v)
 #define nlm_get_pic_pcibase(node) nlm_pcicfg_base(XLP_IO_PIC_OFFSET(node))
 #define nlm_get_pic_regbase(node) (nlm_get_pic_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
+/* We use PIC on node 0 as a timer */
+#define pic_timer_freq()               nlm_get_pic_frequency(0)
+
 /* IRT and h/w interrupt routines */
 static inline int
 nlm_pic_read_irt(uint64_t base, int irt_index)
index 470e52b..fcf2833 100644 (file)
 #define SYS_SCRTCH2                            0x4b
 #define SYS_SCRTCH3                            0x4c
 
+/* PLL registers XLP2XX */
+#define SYS_PLL_CTRL0                          0x240
+#define SYS_PLL_CTRL1                          0x241
+#define SYS_PLL_CTRL2                          0x242
+#define SYS_PLL_CTRL3                          0x243
+#define SYS_DMC_PLL_CTRL0                      0x244
+#define SYS_DMC_PLL_CTRL1                      0x245
+#define SYS_DMC_PLL_CTRL2                      0x246
+#define SYS_DMC_PLL_CTRL3                      0x247
+
+#define SYS_PLL_CTRL0_DEVX(x)                  (0x248 + (x) * 4)
+#define SYS_PLL_CTRL1_DEVX(x)                  (0x249 + (x) * 4)
+#define SYS_PLL_CTRL2_DEVX(x)                  (0x24a + (x) * 4)
+#define SYS_PLL_CTRL3_DEVX(x)                  (0x24b + (x) * 4)
+
+#define SYS_CPU_PLL_CHG_CTRL                   0x288
+#define SYS_PLL_CHG_CTRL                       0x289
+#define SYS_CLK_DEV_DIS                                0x28a
+#define SYS_CLK_DEV_SEL                                0x28b
+#define SYS_CLK_DEV_DIV                                0x28c
+#define SYS_CLK_DEV_CHG                                0x28d
+#define SYS_CLK_DEV_SEL_REG                    0x28e
+#define SYS_CLK_DEV_DIV_REG                    0x28f
+#define SYS_CPU_PLL_LOCK                       0x29f
+#define SYS_SYS_PLL_LOCK                       0x2a0
+#define SYS_PLL_MEM_CMD                                0x2a1
+#define SYS_CPU_PLL_MEM_REQ                    0x2a2
+#define SYS_SYS_PLL_MEM_REQ                    0x2a3
+#define SYS_PLL_MEM_STAT                       0x2a4
+
 #ifndef __ASSEMBLY__
 
 #define nlm_read_sys_reg(b, r)         nlm_read_reg(b, r)
 #define nlm_get_sys_pcibase(node) nlm_pcicfg_base(XLP_IO_SYS_OFFSET(node))
 #define nlm_get_sys_regbase(node) (nlm_get_sys_pcibase(node) + XLP_IO_PCI_HDRSZ)
 
+unsigned int nlm_get_pic_frequency(int node);
 #endif
 #endif
index f4ea0f7..17daffb 100644 (file)
 #define PIC_PCIE_LINK_1_IRQ            20
 #define PIC_PCIE_LINK_2_IRQ            21
 #define PIC_PCIE_LINK_3_IRQ            22
+
 #define PIC_EHCI_0_IRQ                 23
 #define PIC_EHCI_1_IRQ                 24
 #define PIC_OHCI_0_IRQ                 25
 #define PIC_OHCI_1_IRQ                 26
 #define PIC_OHCI_2_IRQ                 27
 #define PIC_OHCI_3_IRQ                 28
+#define PIC_2XX_XHCI_0_IRQ             23
+#define PIC_2XX_XHCI_1_IRQ             24
+#define PIC_2XX_XHCI_2_IRQ             25
+
 #define PIC_MMC_IRQ                    29
 #define PIC_I2C_0_IRQ                  30
 #define PIC_I2C_1_IRQ                  31
+#define PIC_I2C_2_IRQ                  32
+#define PIC_I2C_3_IRQ                  33
 
 #ifndef __ASSEMBLY__
 
@@ -59,7 +66,17 @@ void xlp_wakeup_secondary_cpus(void);
 
 void xlp_mmu_init(void);
 void nlm_hal_init(void);
+int xlp_get_dram_map(int n, uint64_t *dram_map);
+
+/* Device tree related */
 void *xlp_dt_init(void *fdtp);
 
+static inline int cpu_is_xlpii(void)
+{
+       int chip = read_c0_prid() & 0xff00;
+
+       return chip == PRID_IMP_NETLOGIC_XLP2XX;
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* _ASM_NLM_XLP_H */
index 63c9917..3c80a75 100644 (file)
@@ -36,6 +36,8 @@
 #define _ASM_NLM_XLR_PIC_H
 
 #define PIC_CLK_HZ                     66666666
+#define pic_timer_freq()               PIC_CLK_HZ
+
 /* PIC hardware interrupt numbers */
 #define PIC_IRT_WD_INDEX               0
 #define PIC_IRT_TIMER_0_INDEX          1
index a2eed23..f5d77b9 100644 (file)
@@ -251,4 +251,6 @@ extern void (*octeon_irq_setup_secondary)(void);
 typedef void (*octeon_irq_ip4_handler_t)(void);
 void octeon_irq_set_ip4_handler(octeon_irq_ip4_handler_t);
 
+extern void octeon_fixup_irqs(void);
+
 #endif /* __ASM_OCTEON_OCTEON_H */
diff --git a/arch/mips/include/asm/parport.h b/arch/mips/include/asm/parport.h
deleted file mode 100644 (file)
index cf252af..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/parport.h>
diff --git a/arch/mips/include/asm/percpu.h b/arch/mips/include/asm/percpu.h
deleted file mode 100644 (file)
index 844e763..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_PERCPU_H
-#define __ASM_PERCPU_H
-
-#include <asm-generic/percpu.h>
-
-#endif /* __ASM_PERCPU_H */
diff --git a/arch/mips/include/asm/scatterlist.h b/arch/mips/include/asm/scatterlist.h
deleted file mode 100644 (file)
index 7ee0e64..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_SCATTERLIST_H
-#define __ASM_SCATTERLIST_H
-
-#include <asm-generic/scatterlist.h>
-
-#endif /* __ASM_SCATTERLIST_H */
diff --git a/arch/mips/include/asm/sections.h b/arch/mips/include/asm/sections.h
deleted file mode 100644 (file)
index b7e3726..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SECTIONS_H
-#define _ASM_SECTIONS_H
-
-#include <asm-generic/sections.h>
-
-#endif /* _ASM_SECTIONS_H */
diff --git a/arch/mips/include/asm/segment.h b/arch/mips/include/asm/segment.h
deleted file mode 100644 (file)
index 92ac001..0000000
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef _ASM_SEGMENT_H
-#define _ASM_SEGMENT_H
-
-/* Only here because we have some old header files that expect it.. */
-
-#endif /* _ASM_SEGMENT_H */
diff --git a/arch/mips/include/asm/serial.h b/arch/mips/include/asm/serial.h
deleted file mode 100644 (file)
index a0cb0ca..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/serial.h>
diff --git a/arch/mips/include/asm/ucontext.h b/arch/mips/include/asm/ucontext.h
deleted file mode 100644 (file)
index 9bc07b9..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ucontext.h>
diff --git a/arch/mips/include/asm/xor.h b/arch/mips/include/asm/xor.h
deleted file mode 100644 (file)
index c82eb12..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/xor.h>
index 350cccc..be7196e 100644 (file)
@@ -1,7 +1,9 @@
 # UAPI Header export list
 include include/uapi/asm-generic/Kbuild.asm
 
-header-y += auxvec.h
+generic-y += auxvec.h
+generic-y += ipcbuf.h
+
 header-y += bitsperlong.h
 header-y += break.h
 header-y += byteorder.h
@@ -11,7 +13,6 @@ header-y += fcntl.h
 header-y += inst.h
 header-y += ioctl.h
 header-y += ioctls.h
-header-y += ipcbuf.h
 header-y += kvm_para.h
 header-y += mman.h
 header-y += msgbuf.h
diff --git a/arch/mips/include/uapi/asm/auxvec.h b/arch/mips/include/uapi/asm/auxvec.h
deleted file mode 100644 (file)
index 7cf7f2d..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-#ifndef _ASM_AUXVEC_H
-#define _ASM_AUXVEC_H
-
-#endif /* _ASM_AUXVEC_H */
diff --git a/arch/mips/include/uapi/asm/ipcbuf.h b/arch/mips/include/uapi/asm/ipcbuf.h
deleted file mode 100644 (file)
index 84c7e51..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/ipcbuf.h>
index 4c6167a..37663c7 100644 (file)
@@ -852,10 +852,17 @@ platform:
        case PRID_IMP_CAVIUM_CN63XX:
        case PRID_IMP_CAVIUM_CN66XX:
        case PRID_IMP_CAVIUM_CN68XX:
+       case PRID_IMP_CAVIUM_CNF71XX:
                c->cputype = CPU_CAVIUM_OCTEON2;
                __cpu_name[cpu] = "Cavium Octeon II";
                set_elf_platform(cpu, "octeon2");
                break;
+       case PRID_IMP_CAVIUM_CN70XX:
+       case PRID_IMP_CAVIUM_CN78XX:
+               c->cputype = CPU_CAVIUM_OCTEON3;
+               __cpu_name[cpu] = "Cavium Octeon III";
+               set_elf_platform(cpu, "octeon3");
+               break;
        default:
                printk(KERN_INFO "Unknown Octeon chip!\n");
                c->cputype = CPU_UNKNOWN;
@@ -899,6 +906,11 @@ static inline void cpu_probe_netlogic(struct cpuinfo_mips *c, int cpu)
                        MIPS_CPU_LLSC);
 
        switch (c->processor_id & 0xff00) {
+       case PRID_IMP_NETLOGIC_XLP2XX:
+               c->cputype = CPU_XLP;
+               __cpu_name[cpu] = "Broadcom XLPII";
+               break;
+
        case PRID_IMP_NETLOGIC_XLP8XX:
        case PRID_IMP_NETLOGIC_XLP3XX:
                c->cputype = CPU_XLP;
index 0654bff..6cbbf6e 100644 (file)
@@ -37,13 +37,13 @@ static struct clocksource clocksource_dec = {
        .flags          = CLOCK_SOURCE_IS_CONTINUOUS,
 };
 
-void __init dec_ioasic_clocksource_init(void)
+int __init dec_ioasic_clocksource_init(void)
 {
        unsigned int freq;
        u32 start, end;
-       int i = HZ / 10;
-
+       int i = HZ / 8;
 
+       ds1287_timer_state();
        while (!ds1287_timer_state())
                ;
 
@@ -55,9 +55,15 @@ void __init dec_ioasic_clocksource_init(void)
 
        end = dec_ioasic_hpt_read(&clocksource_dec);
 
-       freq = (end - start) * 10;
+       freq = (end - start) * 8;
+
+       /* An early revision of the I/O ASIC didn't have the counter.  */
+       if (!freq)
+               return -ENXIO;
+
        printk(KERN_INFO "I/O ASIC clock frequency %dHz\n", freq);
 
        clocksource_dec.rating = 200 + freq / 10000000;
        clocksource_register_hz(&clocksource_dec, freq);
+       return 0;
 }
index 0c655de..42f8875 100644 (file)
@@ -166,6 +166,7 @@ void __init check_wait(void)
        case CPU_CAVIUM_OCTEON:
        case CPU_CAVIUM_OCTEON_PLUS:
        case CPU_CAVIUM_OCTEON2:
+       case CPU_CAVIUM_OCTEON3:
        case CPU_JZRISC:
        case CPU_LOONGSON1:
        case CPU_XLR:
index a03e93c..539b629 100644 (file)
@@ -83,7 +83,7 @@ _mcount:
        PTR_S   MCOUNT_RA_ADDRESS_REG, PT_R12(sp)
 #endif
 
-       move    a0, ra          /* arg1: self return address */
+       PTR_SUBU a0, ra, 8      /* arg1: self address */
        .globl ftrace_call
 ftrace_call:
        nop     /* a placeholder for the call to a real tracing function */
index 43d2d78..74bab9d 100644 (file)
@@ -26,6 +26,12 @@ process_entry:
        PTR_L           s2, (s0)
        PTR_ADD         s0, s0, SZREG
 
+       /*
+        * In case of a kdump/crash kernel, the indirection page is not
+        * populated as the kernel is directly copied to a reserved location
+        */
+       beqz            s2, done
+
        /* destination page */
        and             s3, s2, 0x1
        beq             s3, zero, 1f
index c7f9051..c538d6e 100644 (file)
@@ -552,6 +552,52 @@ static void __init arch_mem_addpart(phys_t mem, phys_t end, int type)
        add_memory_region(mem, size, type);
 }
 
+#ifdef CONFIG_KEXEC
+static inline unsigned long long get_total_mem(void)
+{
+       unsigned long long total;
+
+       total = max_pfn - min_low_pfn;
+       return total << PAGE_SHIFT;
+}
+
+static void __init mips_parse_crashkernel(void)
+{
+       unsigned long long total_mem;
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       total_mem = get_total_mem();
+       ret = parse_crashkernel(boot_command_line, total_mem,
+                               &crash_size, &crash_base);
+       if (ret != 0 || crash_size <= 0)
+               return;
+
+       crashk_res.start = crash_base;
+       crashk_res.end   = crash_base + crash_size - 1;
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+       int ret;
+
+       ret = request_resource(res, &crashk_res);
+       if (!ret)
+               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
+                       (unsigned long)((crashk_res.end -
+                                        crashk_res.start + 1) >> 20),
+                       (unsigned long)(crashk_res.start  >> 20));
+}
+#else /* !defined(CONFIG_KEXEC)                */
+static void __init mips_parse_crashkernel(void)
+{
+}
+
+static void __init request_crashkernel(struct resource *res)
+{
+}
+#endif /* !defined(CONFIG_KEXEC)  */
+
 static void __init arch_mem_init(char **cmdline_p)
 {
        extern void plat_mem_setup(void);
@@ -608,6 +654,8 @@ static void __init arch_mem_init(char **cmdline_p)
                                BOOTMEM_DEFAULT);
        }
 #endif
+
+       mips_parse_crashkernel();
 #ifdef CONFIG_KEXEC
        if (crashk_res.start != crashk_res.end)
                reserve_bootmem(crashk_res.start,
@@ -620,52 +668,6 @@ static void __init arch_mem_init(char **cmdline_p)
        paging_init();
 }
 
-#ifdef CONFIG_KEXEC
-static inline unsigned long long get_total_mem(void)
-{
-       unsigned long long total;
-
-       total = max_pfn - min_low_pfn;
-       return total << PAGE_SHIFT;
-}
-
-static void __init mips_parse_crashkernel(void)
-{
-       unsigned long long total_mem;
-       unsigned long long crash_size, crash_base;
-       int ret;
-
-       total_mem = get_total_mem();
-       ret = parse_crashkernel(boot_command_line, total_mem,
-                               &crash_size, &crash_base);
-       if (ret != 0 || crash_size <= 0)
-               return;
-
-       crashk_res.start = crash_base;
-       crashk_res.end   = crash_base + crash_size - 1;
-}
-
-static void __init request_crashkernel(struct resource *res)
-{
-       int ret;
-
-       ret = request_resource(res, &crashk_res);
-       if (!ret)
-               pr_info("Reserving %ldMB of memory at %ldMB for crashkernel\n",
-                       (unsigned long)((crashk_res.end -
-                               crashk_res.start + 1) >> 20),
-                       (unsigned long)(crashk_res.start  >> 20));
-}
-#else /* !defined(CONFIG_KEXEC)         */
-static void __init mips_parse_crashkernel(void)
-{
-}
-
-static void __init request_crashkernel(struct resource *res)
-{
-}
-#endif /* !defined(CONFIG_KEXEC)  */
-
 static void __init resource_init(void)
 {
        int i;
@@ -678,11 +680,6 @@ static void __init resource_init(void)
        data_resource.start = __pa_symbol(&_etext);
        data_resource.end = __pa_symbol(&_edata) - 1;
 
-       /*
-        * Request address space for all standard RAM.
-        */
-       mips_parse_crashkernel();
-
        for (i = 0; i < boot_mem_map.nr_map; i++) {
                struct resource *res;
                unsigned long start, end;
index c2e5d74..5969f1e 100644 (file)
@@ -99,7 +99,9 @@ static void cmp_init_secondary(void)
 
        c->core = (read_c0_ebase() >> 1) & 0x1ff;
 #if defined(CONFIG_MIPS_MT_SMP) || defined(CONFIG_MIPS_MT_SMTC)
-       c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE;
+       if (cpu_has_mipsmt)
+               c->vpe_id = (read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) &
+                       TCBIND_CURVPE;
 #endif
 #ifdef CONFIG_MIPS_MT_SMTC
        c->tc_id  = (read_c0_tcbind() & TCBIND_CURTC) >> TCBIND_CURTC_SHIFT;
@@ -177,9 +179,16 @@ void __init cmp_smp_setup(void)
        }
 
        if (cpu_has_mipsmt) {
-               unsigned int nvpe, mvpconf0 = read_c0_mvpconf0();
+               unsigned int nvpe = 1;
+#ifdef CONFIG_MIPS_MT_SMP
+               unsigned int mvpconf0 = read_c0_mvpconf0();
+
+               nvpe = ((mvpconf0 & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
+#elif defined(CONFIG_MIPS_MT_SMTC)
+               unsigned int mvpconf0 = read_c0_mvpconf0();
 
                nvpe = ((mvpconf0 & MVPCONF0_PTC) >> MVPCONF0_PTC_SHIFT) + 1;
+#endif
                smp_num_siblings = nvpe;
        }
        pr_info("Detected %i available secondary CPU(s)\n", ncpu);
index 9d686bf..364d26a 100644 (file)
@@ -121,6 +121,14 @@ void __init time_init(void)
 {
        plat_time_init();
 
-       if (!mips_clockevent_init() || !cpu_has_mfc0_count_bug())
+       /*
+        * The use of the R4k timer as a clock event takes precedence;
+        * if reading the Count register might interfere with the timer
+        * interrupt, then we don't use the timer as a clock source.
+        * We may still use the timer as a clock source though if the
+        * timer interrupt isn't reliable; the interference doesn't
+        * matter then, because we don't use the interrupt.
+        */
+       if (mips_clockevent_init() != 0 || !cpu_has_mfc0_count_bug())
                init_mips_clocksource();
 }
index 05826d2..3b46f7c 100644 (file)
@@ -179,5 +179,6 @@ SECTIONS
                *(.options)
                *(.pdr)
                *(.reginfo)
+               *(.eh_frame)
        }
 }
index faf84c5..59b2b3c 100644 (file)
@@ -1368,7 +1368,7 @@ out_einval:
 }
 static DEVICE_ATTR_RW(ntcs);
 
-static struct attribute vpe_attrs[] = {
+static struct attribute *vpe_attrs[] = {
        &dev_attr_kill.attr,
        &dev_attr_ntcs.attr,
        NULL,
index ff4894a..8f1866d 100644 (file)
@@ -48,6 +48,7 @@
 #define CPU0CC_CPUDIV          0x0001
 
 /* Activation Status Register */
+#define ACTS_ASC0_ACT  0x00001000
 #define ACTS_ASC1_ACT  0x00000800
 #define ACTS_I2C_ACT   0x00004000
 #define ACTS_P0                0x00010000
@@ -108,6 +109,7 @@ static void sysctl_deactivate(struct clk *clk)
 static int sysctl_clken(struct clk *clk)
 {
        sysctl_w32(clk->module, clk->bits, SYSCTL_CLKEN);
+       sysctl_w32(clk->module, clk->bits, SYSCTL_ACT);
        sysctl_wait(clk, clk->bits, SYSCTL_CLKS);
        return 0;
 }
@@ -256,6 +258,7 @@ void __init ltq_soc_init(void)
        clkdev_add_sys("1e800400.pad", SYSCTL_SYS1, ACTS_PADCTRL1);
        clkdev_add_sys("1e800500.pad", SYSCTL_SYS1, ACTS_PADCTRL3);
        clkdev_add_sys("1e800600.pad", SYSCTL_SYS1, ACTS_PADCTRL4);
-       clkdev_add_sys("1e100C00.serial", SYSCTL_SYS1, ACTS_ASC1_ACT);
+       clkdev_add_sys("1e100b00.serial", SYSCTL_SYS1, ACTS_ASC1_ACT);
+       clkdev_add_sys("1e100c00.serial", SYSCTL_SYS1, ACTS_ASC0_ACT);
        clkdev_add_sys("1e200000.i2c", SYSCTL_SYS1, ACTS_I2C_ACT);
 }
index 7a13660..087497d 100644 (file)
@@ -1,3 +1,3 @@
-obj-y := prom.o sysctrl.o clk.o reset.o dma.o gptu.o
+obj-y := prom.o sysctrl.o clk.o reset.o dma.o gptu.o dcdc.o
 
 obj-$(CONFIG_XRX200_PHY_FW) += xrx200_phy_fw.o
diff --git a/arch/mips/lantiq/xway/dcdc.c b/arch/mips/lantiq/xway/dcdc.c
new file mode 100644 (file)
index 0000000..7688ac0
--- /dev/null
@@ -0,0 +1,63 @@
+/*
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ *
+ *  Copyright (C) 2012 John Crispin <blogic@openwrt.org>
+ *  Copyright (C) 2010 Sameer Ahmad, Lantiq GmbH
+ */
+
+#include <linux/ioport.h>
+#include <linux/of_platform.h>
+
+#include <lantiq_soc.h>
+
+/* Bias and regulator Setup Register */
+#define DCDC_BIAS_VREG0        0xa
+/* Bias and regulator Setup Register */
+#define DCDC_BIAS_VREG1        0xb
+
+#define dcdc_w8(x, y)  ltq_w8((x), dcdc_membase + (y))
+#define dcdc_r8(x)     ltq_r8(dcdc_membase + (x))
+
+static void __iomem *dcdc_membase;
+
+static int dcdc_probe(struct platform_device *pdev)
+{
+       struct resource *res;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       dcdc_membase = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(dcdc_membase))
+               return PTR_ERR(dcdc_membase);
+
+       dev_info(&pdev->dev, "Core Voltage : %d mV\n",
+               dcdc_r8(DCDC_BIAS_VREG1) * 8);
+
+       return 0;
+}
+
+static const struct of_device_id dcdc_match[] = {
+       { .compatible = "lantiq,dcdc-xrx200" },
+       {},
+};
+
+static struct platform_driver dcdc_driver = {
+       .probe = dcdc_probe,
+       .driver = {
+               .name = "dcdc-xrx200",
+               .owner = THIS_MODULE,
+               .of_match_table = dcdc_match,
+       },
+};
+
+int __init dcdc_init(void)
+{
+       int ret = platform_driver_register(&dcdc_driver);
+
+       if (ret)
+               pr_info("dcdc: Error registering platform driver\n");
+       return ret;
+}
+
+arch_initcall(dcdc_init);
index dfb509d..fd32075 100644 (file)
@@ -13,13 +13,11 @@ endif
 MKLASATIMG = mklasatimg
 MKLASATIMG_ARCH = mq2,mqpro,sp100,sp200
 KERNEL_IMAGE = vmlinux
-KERNEL_START = $(shell $(NM) $(KERNEL_IMAGE) | grep " _text" | cut -f1 -d\ )
-KERNEL_ENTRY = $(shell $(NM) $(KERNEL_IMAGE) | grep kernel_entry | cut -f1 -d\ )
 
 LDSCRIPT= -L$(srctree)/$(src) -Tromscript.normal
 
-HEAD_DEFINES := -D_kernel_start=0x$(KERNEL_START) \
-               -D_kernel_entry=0x$(KERNEL_ENTRY) \
+HEAD_DEFINES := -D_kernel_start=$(VMLINUX_LOAD_ADDRESS) \
+               -D_kernel_entry=$(VMLINUX_ENTRY_ADDRESS) \
                -D VERSION="\"$(Version)\"" \
                -D TIMESTAMP=$(shell date +%s)
 
index 4c57b3e..9e4484c 100644 (file)
@@ -3,8 +3,9 @@
 #
 
 obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
-    pci.o bonito-irq.o mem.o machtype.o platform.o
+    bonito-irq.o mem.o machtype.o platform.o
 obj-$(CONFIG_GPIOLIB) += gpio.o
+obj-$(CONFIG_PCI) += pci.o
 
 #
 # Serial port support
index 46048d2..efe0088 100644 (file)
@@ -436,7 +436,6 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr)
                                break;
                        default:
                                return SIGILL;
-                               break;
                        }
                        break;
                case mm_32f_74_op:      /* c.cond.fmt */
@@ -451,12 +450,10 @@ static int microMIPS32_to_MIPS32(union mips_instruction *insn_ptr)
                        break;
                default:
                        return SIGILL;
-                       break;
                }
                break;
        default:
                return SIGILL;
-               break;
        }
 
        *insn_ptr = mips32_insn;
@@ -491,7 +488,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                                dec_insn.next_pc_inc;
                                *contpc = regs->regs[insn.mm_i_format.rs];
                                return 1;
-                               break;
                        }
                }
                break;
@@ -513,7 +509,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                case mm_bgezals_op:
                case mm_bgezal_op:
                        regs->regs[31] = regs->cp0_epc +
@@ -530,7 +525,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                case mm_blez_op:
                        if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
                                *contpc = regs->cp0_epc +
@@ -541,7 +535,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                case mm_bgtz_op:
                        if ((long)regs->regs[insn.mm_i_format.rs] <= 0)
                                *contpc = regs->cp0_epc +
@@ -552,7 +545,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                case mm_bc2f_op:
                case mm_bc1f_op:
                        bc_false = 1;
@@ -580,7 +572,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                *contpc = regs->cp0_epc +
                                        dec_insn.pc_inc + dec_insn.next_pc_inc;
                        return 1;
-                       break;
                }
                break;
        case mm_pool16c_op:
@@ -593,7 +584,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                case mm_jr16_op:
                        *contpc = regs->regs[insn.mm_i_format.rs];
                        return 1;
-                       break;
                }
                break;
        case mm_beqz16_op:
@@ -605,7 +595,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                        *contpc = regs->cp0_epc +
                                dec_insn.pc_inc + dec_insn.next_pc_inc;
                return 1;
-               break;
        case mm_bnez16_op:
                if ((long)regs->regs[reg16to32map[insn.mm_b1_format.rs]] != 0)
                        *contpc = regs->cp0_epc +
@@ -615,12 +604,10 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                        *contpc = regs->cp0_epc +
                                dec_insn.pc_inc + dec_insn.next_pc_inc;
                return 1;
-               break;
        case mm_b16_op:
                *contpc = regs->cp0_epc + dec_insn.pc_inc +
                         (insn.mm_b0_format.simmediate << 1);
                return 1;
-               break;
        case mm_beq32_op:
                if (regs->regs[insn.mm_i_format.rs] ==
                    regs->regs[insn.mm_i_format.rt])
@@ -632,7 +619,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-               break;
        case mm_bne32_op:
                if (regs->regs[insn.mm_i_format.rs] !=
                    regs->regs[insn.mm_i_format.rt])
@@ -643,7 +629,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                        *contpc = regs->cp0_epc +
                                dec_insn.pc_inc + dec_insn.next_pc_inc;
                return 1;
-               break;
        case mm_jalx32_op:
                regs->regs[31] = regs->cp0_epc +
                        dec_insn.pc_inc + dec_insn.next_pc_inc;
@@ -652,7 +637,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                *contpc <<= 28;
                *contpc |= (insn.j_format.target << 2);
                return 1;
-               break;
        case mm_jals32_op:
        case mm_jal32_op:
                regs->regs[31] = regs->cp0_epc +
@@ -665,7 +649,6 @@ int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                *contpc |= (insn.j_format.target << 1);
                set_isa16_mode(*contpc);
                return 1;
-               break;
        }
        return 0;
 }
@@ -694,7 +677,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                case jr_op:
                        *contpc = regs->regs[insn.r_format.rs];
                        return 1;
-                       break;
                }
                break;
        case bcond_op:
@@ -716,7 +698,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                case bgezal_op:
                case bgezall_op:
                        regs->regs[31] = regs->cp0_epc +
@@ -734,7 +715,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                        dec_insn.pc_inc +
                                        dec_insn.next_pc_inc;
                        return 1;
-                       break;
                }
                break;
        case jalx_op:
@@ -752,7 +732,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                /* Set microMIPS mode bit: XOR for jalx. */
                *contpc ^= bit;
                return 1;
-               break;
        case beq_op:
        case beql_op:
                if (regs->regs[insn.i_format.rs] ==
@@ -765,7 +744,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-               break;
        case bne_op:
        case bnel_op:
                if (regs->regs[insn.i_format.rs] !=
@@ -778,7 +756,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-               break;
        case blez_op:
        case blezl_op:
                if ((long)regs->regs[insn.i_format.rs] <= 0)
@@ -790,7 +767,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-               break;
        case bgtz_op:
        case bgtzl_op:
                if ((long)regs->regs[insn.i_format.rs] > 0)
@@ -802,7 +778,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                dec_insn.pc_inc +
                                dec_insn.next_pc_inc;
                return 1;
-               break;
 #ifdef CONFIG_CPU_CAVIUM_OCTEON
        case lwc2_op: /* This is bbit0 on Octeon */
                if ((regs->regs[insn.i_format.rs] & (1ull<<insn.i_format.rt)) == 0)
@@ -856,7 +831,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                                dec_insn.pc_inc +
                                                dec_insn.next_pc_inc;
                                return 1;
-                               break;
                        case 1: /* bc1t */
                        case 3: /* bc1tl */
                                if (fcr31 & (1 << bit))
@@ -868,7 +842,6 @@ static int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn,
                                                dec_insn.pc_inc +
                                                dec_insn.next_pc_inc;
                                return 1;
-                               break;
                        }
                }
                break;
index a0bcdbb..729e770 100644 (file)
@@ -224,6 +224,20 @@ static void probe_octeon(void)
                c->options |= MIPS_CPU_PREFETCH;
                break;
 
+       case CPU_CAVIUM_OCTEON3:
+               c->icache.linesz = 128;
+               c->icache.sets = 16;
+               c->icache.ways = 39;
+               c->icache.flags |= MIPS_CACHE_VTAG;
+               icache_size = c->icache.sets * c->icache.ways * c->icache.linesz;
+
+               c->dcache.linesz = 128;
+               c->dcache.ways = 32;
+               c->dcache.sets = 8;
+               dcache_size = c->dcache.sets * c->dcache.ways * c->dcache.linesz;
+               c->options |= MIPS_CPU_PREFETCH;
+               break;
+
        default:
                panic("Unsupported Cavium Networks CPU type");
                break;
index aaccf1c..664e523 100644 (file)
@@ -50,16 +50,20 @@ static inline struct page *dma_addr_to_page(struct device *dev,
 }
 
 /*
+ * The affected CPUs below in 'cpu_needs_post_dma_flush()' can
+ * speculatively fill random cachelines with stale data at any time,
+ * requiring an extra flush post-DMA.
+ *
  * Warning on the terminology - Linux calls an uncached area coherent;
  * MIPS terminology calls memory areas with hardware maintained coherency
  * coherent.
  */
-
-static inline int cpu_is_noncoherent_r10000(struct device *dev)
+static inline int cpu_needs_post_dma_flush(struct device *dev)
 {
        return !plat_device_is_coherent(dev) &&
-              (current_cpu_type() == CPU_R10000 ||
-              current_cpu_type() == CPU_R12000);
+              (boot_cpu_type() == CPU_R10000 ||
+               boot_cpu_type() == CPU_R12000 ||
+               boot_cpu_type() == CPU_BMIPS5000);
 }
 
 static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp)
@@ -230,7 +234,7 @@ static inline void __dma_sync(struct page *page,
 static void mips_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
        size_t size, enum dma_data_direction direction, struct dma_attrs *attrs)
 {
-       if (cpu_is_noncoherent_r10000(dev))
+       if (cpu_needs_post_dma_flush(dev))
                __dma_sync(dma_addr_to_page(dev, dma_addr),
                           dma_addr & ~PAGE_MASK, size, direction);
 
@@ -284,7 +288,7 @@ static void mips_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
 static void mips_dma_sync_single_for_cpu(struct device *dev,
        dma_addr_t dma_handle, size_t size, enum dma_data_direction direction)
 {
-       if (cpu_is_noncoherent_r10000(dev))
+       if (cpu_needs_post_dma_flush(dev))
                __dma_sync(dma_addr_to_page(dev, dma_handle),
                           dma_handle & ~PAGE_MASK, size, direction);
 }
@@ -305,7 +309,7 @@ static void mips_dma_sync_sg_for_cpu(struct device *dev,
 
        /* Make sure that gcc doesn't leave the empty loop body.  */
        for (i = 0; i < nelems; i++, sg++) {
-               if (cpu_is_noncoherent_r10000(dev))
+               if (cpu_needs_post_dma_flush(dev))
                        __dma_sync(sg_page(sg), sg->offset, sg->length,
                                   direction);
        }
index 85df1cd..becc42b 100644 (file)
@@ -42,8 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
        const int field = sizeof(unsigned long) * 2;
        siginfo_t info;
        int fault;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                                (write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
 #if 0
        printk("Cpu%d[%s:%d:%0*lx:%ld:%0*lx]\n", raw_smp_processor_id(),
@@ -93,6 +92,8 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write,
        if (in_atomic() || !mm)
                goto bad_area_nosemaphore;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -114,6 +115,7 @@ good_area:
        if (write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (cpu_has_rixi) {
                        if (address == regs->cp0_epc && !(vma->vm_flags & VM_EXEC)) {
@@ -241,6 +243,8 @@ out_of_memory:
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        up_read(&mm->mmap_sem);
+       if (!user_mode(regs))
+               goto no_context;
        pagefault_out_of_memory();
        return;
 
index d4ea5c9..06ce17c 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/swap.h>
 #include <linux/hugetlb.h>
 
+#include <asm/cpu-features.h>
 #include <asm/pgtable.h>
 
 static inline pte_t gup_get_pte(pte_t *ptep)
@@ -273,7 +274,7 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write,
        len = (unsigned long) nr_pages << PAGE_SHIFT;
 
        end = start + len;
-       if (end < start)
+       if (end < start || cpu_has_dc_aliases)
                goto slow_irqon;
 
        /* XXX: batch / limit 'nr' */
index a7fee0d..01fda44 100644 (file)
@@ -85,6 +85,11 @@ int pud_huge(pud_t pud)
        return (pud_val(pud) & _PAGE_HUGE) != 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
+
 struct page *
 follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                pmd_t *pmd, int write)
index 4e73f10..e205ef5 100644 (file)
@@ -254,6 +254,7 @@ void copy_from_user_page(struct vm_area_struct *vma,
                        SetPageDcacheDirty(page);
        }
 }
+EXPORT_SYMBOL_GPL(copy_from_user_page);
 
 void __init fixrange_init(unsigned long start, unsigned long end,
        pgd_t *pgd_base)
index 30a494d..79bca31 100644 (file)
 
 #define FASTPATH_SIZE  128
 
+#ifdef CONFIG_MIPS_PGD_C0_CONTEXT
 LEAF(tlbmiss_handler_setup_pgd)
        .space          16 * 4
 END(tlbmiss_handler_setup_pgd)
 EXPORT(tlbmiss_handler_setup_pgd_end)
+#endif
 
 LEAF(handle_tlbm)
        .space          FASTPATH_SIZE * 4
index 556cb48..821b451 100644 (file)
@@ -85,6 +85,7 @@ static int use_bbit_insns(void)
        case CPU_CAVIUM_OCTEON:
        case CPU_CAVIUM_OCTEON_PLUS:
        case CPU_CAVIUM_OCTEON2:
+       case CPU_CAVIUM_OCTEON3:
                return 1;
        default:
                return 0;
@@ -95,6 +96,7 @@ static int use_lwx_insns(void)
 {
        switch (current_cpu_type()) {
        case CPU_CAVIUM_OCTEON2:
+       case CPU_CAVIUM_OCTEON3:
                return 1;
        default:
                return 0;
index 2447bf9..852a4ee 100644 (file)
@@ -19,6 +19,15 @@ config DT_XLP_SVP
          pointer to the kernel.  The corresponding DTS file is at
          arch/mips/netlogic/dts/xlp_svp.dts
 
+config DT_XLP_FVP
+       bool "Built-in device tree for XLP FVP boards"
+       default y
+       help
+         Add an FDT blob for XLP FVP board into the kernel.
+         This DTB will be used if the firmware does not pass in a DTB
+         pointer to the kernel.  The corresponding DTS file is at
+         arch/mips/netlogic/dts/xlp_fvp.dts
+
 config NLM_MULTINODE
        bool "Support for multi-chip boards"
        depends on NLM_XLP_BOARD
index 4e35d9c..6f8feb9 100644 (file)
@@ -106,9 +106,7 @@ void nlm_early_init_secondary(int cpu)
 {
        change_c0_config(CONF_CM_CMASK, 0x3);
 #ifdef CONFIG_CPU_XLP
-       /* mmu init, once per core */
-       if (cpu % NLM_THREADS_PER_CORE == 0)
-               xlp_mmu_init();
+       xlp_mmu_init();
 #endif
        write_c0_ebase(nlm_current_node()->ebase);
 }
index 045a396..13391b8 100644 (file)
@@ -45,6 +45,7 @@
 #if defined(CONFIG_CPU_XLP)
 #include <asm/netlogic/xlp-hal/iomap.h>
 #include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/sys.h>
 #include <asm/netlogic/xlp-hal/pic.h>
 #elif defined(CONFIG_CPU_XLR)
 #include <asm/netlogic/xlr/iomap.h>
@@ -91,7 +92,7 @@ static void nlm_init_pic_timer(void)
                csrc_pic.read   = nlm_get_pic_timer;
        }
        csrc_pic.rating = 1000;
-       clocksource_register_hz(&csrc_pic, PIC_CLK_HZ);
+       clocksource_register_hz(&csrc_pic, pic_timer_freq());
 }
 
 void __init plat_time_init(void)
index aecb6fa..0b9be5f 100644 (file)
@@ -1,2 +1,3 @@
 obj-$(CONFIG_DT_XLP_EVP) := xlp_evp.dtb.o
 obj-$(CONFIG_DT_XLP_SVP) += xlp_svp.dtb.o
+obj-$(CONFIG_DT_XLP_FVP) += xlp_fvp.dtb.o
index 0640703..89ad048 100644 (file)
@@ -9,19 +9,12 @@
        #address-cells = <2>;
        #size-cells = <2>;
 
-       memory {
-               device_type = "memory";
-               reg =  <0 0x00100000 0 0x0FF00000       // 255M at 1M
-                       0 0x20000000 0 0xa0000000       // 2560M at 512M
-                       0 0xe0000000 1 0x00000000>;
-       };
-
        soc {
                #address-cells = <2>;
                #size-cells = <1>;
                compatible = "simple-bus";
                ranges = <0 0  0 0x18000000  0x04000000   // PCIe CFG
-                         1 0  0 0x16000000  0x01000000>; // GBU chipselects
+                         1 0  0 0x16000000  0x02000000>; // GBU chipselects
 
                serial0: serial@30000 {
                        device_type = "serial";
diff --git a/arch/mips/netlogic/dts/xlp_fvp.dts b/arch/mips/netlogic/dts/xlp_fvp.dts
new file mode 100644 (file)
index 0000000..63e62b7
--- /dev/null
@@ -0,0 +1,118 @@
+/*
+ * XLP2XX Device Tree Source for FVP boards
+ */
+
+/dts-v1/;
+/ {
+       model = "netlogic,XLP-FVP";
+       compatible = "netlogic,xlp";
+       #address-cells = <2>;
+       #size-cells = <2>;
+
+       soc {
+               #address-cells = <2>;
+               #size-cells = <1>;
+               compatible = "simple-bus";
+               ranges = <0 0  0 0x18000000  0x04000000   // PCIe CFG
+                         1 0  0 0x16000000  0x02000000>; // GBU chipselects
+
+               serial0: serial@30000 {
+                       device_type = "serial";
+                       compatible = "ns16550";
+                       reg = <0 0x30100 0xa00>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       clock-frequency = <133333333>;
+                       interrupt-parent = <&pic>;
+                       interrupts = <17>;
+               };
+               serial1: serial@31000 {
+                       device_type = "serial";
+                       compatible = "ns16550";
+                       reg = <0 0x31100 0xa00>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       clock-frequency = <133333333>;
+                       interrupt-parent = <&pic>;
+                       interrupts = <18>;
+               };
+               i2c0: ocores@37100 {
+                       compatible = "opencores,i2c-ocores";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg = <0 0x37100 0x20>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       clock-frequency = <32000000>;
+                       interrupt-parent = <&pic>;
+                       interrupts = <30>;
+               };
+               i2c1: ocores@37120 {
+                       compatible = "opencores,i2c-ocores";
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       reg = <0 0x37120 0x20>;
+                       reg-shift = <2>;
+                       reg-io-width = <4>;
+                       clock-frequency = <32000000>;
+                       interrupt-parent = <&pic>;
+                       interrupts = <31>;
+
+                       rtc@68 {
+                               compatible = "dallas,ds1374";
+                               reg = <0x68>;
+                       };
+
+                       dtt@4c {
+                               compatible = "national,lm90";
+                               reg = <0x4c>;
+                       };
+               };
+               pic: pic@4000 {
+                       compatible = "netlogic,xlp-pic";
+                       #address-cells = <0>;
+                       #interrupt-cells = <1>;
+                       reg = <0 0x4000 0x200>;
+                       interrupt-controller;
+               };
+
+               nor_flash@1,0 {
+                       compatible = "cfi-flash";
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       bank-width = <2>;
+                       reg = <1 0 0x1000000>;
+
+                       partition@0 {
+                               label = "x-loader";
+                               reg = <0x0 0x100000>; /* 1M */
+                               read-only;
+                       };
+
+                       partition@100000 {
+                               label = "u-boot";
+                               reg = <0x100000 0x100000>; /* 1M */
+                       };
+
+                       partition@200000 {
+                               label = "kernel";
+                               reg = <0x200000 0x500000>; /* 5M */
+                       };
+
+                       partition@700000 {
+                               label = "rootfs";
+                               reg = <0x700000 0x800000>; /* 8M */
+                       };
+
+                       partition@f00000 {
+                               label = "env";
+                               reg = <0xf00000 0x100000>; /* 1M */
+                               read-only;
+                       };
+               };
+       };
+
+       chosen {
+               bootargs = "console=ttyS0,115200 rdinit=/sbin/init";
+       };
+};
index 9c5db10..1ebd00e 100644 (file)
@@ -9,19 +9,12 @@
        #address-cells = <2>;
        #size-cells = <2>;
 
-       memory {
-               device_type = "memory";
-               reg =  <0 0x00100000 0 0x0FF00000       // 255M at 1M
-                       0 0x20000000 0 0xa0000000       // 2560M at 512M
-                       0 0xe0000000 0 0x40000000>;
-       };
-
        soc {
                #address-cells = <2>;
                #size-cells = <1>;
                compatible = "simple-bus";
                ranges = <0 0  0 0x18000000  0x04000000   // PCIe CFG
-                         1 0  0 0x16000000  0x01000000>; // GBU chipselects
+                         1 0  0 0x16000000  0x02000000>; // GBU chipselects
 
                serial0: serial@30000 {
                        device_type = "serial";
index 85ac4a8..ed9a93c 100644 (file)
@@ -1,3 +1,4 @@
 obj-y                          += setup.o nlm_hal.o cop2-ex.o dt.o
 obj-$(CONFIG_SMP)              += wakeup.o
 obj-$(CONFIG_USB)              += usb-init.o
+obj-$(CONFIG_USB)              += usb-init-xlp2.o
index a15cdbb..88df445 100644 (file)
 #include <linux/of_platform.h>
 #include <linux/of_device.h>
 
-extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[], __dtb_start[];
+extern u32 __dtb_xlp_evp_begin[], __dtb_xlp_svp_begin[],
+       __dtb_xlp_fvp_begin[], __dtb_start[];
 
 void __init *xlp_dt_init(void *fdtp)
 {
        if (!fdtp) {
                switch (current_cpu_data.processor_id & 0xff00) {
+#ifdef CONFIG_DT_XLP_FVP
+               case PRID_IMP_NETLOGIC_XLP2XX:
+                       fdtp = __dtb_xlp_fvp_begin;
+                       break;
+#endif
 #ifdef CONFIG_DT_XLP_SVP
                case PRID_IMP_NETLOGIC_XLP3XX:
                        fdtp = __dtb_xlp_svp_begin;
index 87560e4..56c50ba 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/netlogic/haldefs.h>
 #include <asm/netlogic/xlp-hal/iomap.h>
 #include <asm/netlogic/xlp-hal/xlp.h>
+#include <asm/netlogic/xlp-hal/bridge.h>
 #include <asm/netlogic/xlp-hal/pic.h>
 #include <asm/netlogic/xlp-hal/sys.h>
 
@@ -64,6 +65,7 @@ int nlm_irq_to_irt(int irq)
        uint64_t pcibase;
        int devoff, irt;
 
+       devoff = 0;
        switch (irq) {
        case PIC_UART_0_IRQ:
                devoff = XLP_IO_UART0_OFFSET(0);
@@ -71,44 +73,68 @@ int nlm_irq_to_irt(int irq)
        case PIC_UART_1_IRQ:
                devoff = XLP_IO_UART1_OFFSET(0);
                break;
-       case PIC_EHCI_0_IRQ:
-               devoff = XLP_IO_USB_EHCI0_OFFSET(0);
-               break;
-       case PIC_EHCI_1_IRQ:
-               devoff = XLP_IO_USB_EHCI1_OFFSET(0);
-               break;
-       case PIC_OHCI_0_IRQ:
-               devoff = XLP_IO_USB_OHCI0_OFFSET(0);
-               break;
-       case PIC_OHCI_1_IRQ:
-               devoff = XLP_IO_USB_OHCI1_OFFSET(0);
-               break;
-       case PIC_OHCI_2_IRQ:
-               devoff = XLP_IO_USB_OHCI2_OFFSET(0);
-               break;
-       case PIC_OHCI_3_IRQ:
-               devoff = XLP_IO_USB_OHCI3_OFFSET(0);
-               break;
        case PIC_MMC_IRQ:
                devoff = XLP_IO_SD_OFFSET(0);
                break;
-       case PIC_I2C_0_IRQ:
-               devoff = XLP_IO_I2C0_OFFSET(0);
-               break;
+       case PIC_I2C_0_IRQ:     /* I2C will be fixed up */
        case PIC_I2C_1_IRQ:
-               devoff = XLP_IO_I2C1_OFFSET(0);
+       case PIC_I2C_2_IRQ:
+       case PIC_I2C_3_IRQ:
+               if (cpu_is_xlpii())
+                       devoff = XLP2XX_IO_I2C_OFFSET(0);
+               else
+                       devoff = XLP_IO_I2C0_OFFSET(0);
                break;
        default:
-               devoff = 0;
-               break;
+               if (cpu_is_xlpii()) {
+                       switch (irq) {
+                               /* XLP2XX has three XHCI USB controller */
+                       case PIC_2XX_XHCI_0_IRQ:
+                               devoff = XLP2XX_IO_USB_XHCI0_OFFSET(0);
+                               break;
+                       case PIC_2XX_XHCI_1_IRQ:
+                               devoff = XLP2XX_IO_USB_XHCI1_OFFSET(0);
+                               break;
+                       case PIC_2XX_XHCI_2_IRQ:
+                               devoff = XLP2XX_IO_USB_XHCI2_OFFSET(0);
+                               break;
+                       }
+               } else {
+                       switch (irq) {
+                       case PIC_EHCI_0_IRQ:
+                               devoff = XLP_IO_USB_EHCI0_OFFSET(0);
+                               break;
+                       case PIC_EHCI_1_IRQ:
+                               devoff = XLP_IO_USB_EHCI1_OFFSET(0);
+                               break;
+                       case PIC_OHCI_0_IRQ:
+                               devoff = XLP_IO_USB_OHCI0_OFFSET(0);
+                               break;
+                       case PIC_OHCI_1_IRQ:
+                               devoff = XLP_IO_USB_OHCI1_OFFSET(0);
+                               break;
+                       case PIC_OHCI_2_IRQ:
+                               devoff = XLP_IO_USB_OHCI2_OFFSET(0);
+                               break;
+                       case PIC_OHCI_3_IRQ:
+                               devoff = XLP_IO_USB_OHCI3_OFFSET(0);
+                               break;
+                       }
+               }
        }
 
        if (devoff != 0) {
                pcibase = nlm_pcicfg_base(devoff);
                irt = nlm_read_reg(pcibase, XLP_PCI_IRTINFO_REG) & 0xffff;
-               /* HW bug, I2C 1 irt entry is off by one */
-               if (irq == PIC_I2C_1_IRQ)
-                       irt = irt + 1;
+               /* HW weirdness, I2C IRT entry has to be fixed up */
+               switch (irq) {
+               case PIC_I2C_1_IRQ:
+                       irt = irt + 1; break;
+               case PIC_I2C_2_IRQ:
+                       irt = irt + 2; break;
+               case PIC_I2C_3_IRQ:
+                       irt = irt + 3; break;
+               }
        } else if (irq >= PIC_PCIE_LINK_0_IRQ && irq <= PIC_PCIE_LINK_3_IRQ) {
                /* HW bug, PCI IRT entries are bad on early silicon, fix */
                irt = PIC_IRT_PCIE_LINK_INDEX(irq - PIC_PCIE_LINK_0_IRQ);
@@ -126,19 +152,160 @@ unsigned int nlm_get_core_frequency(int node, int core)
 
        sysbase = nlm_get_node(node)->sysbase;
        rstval = nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG);
-       dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE);
-       pll_divf = ((rstval >> 10) & 0x7f) + 1;
-       pll_divr = ((rstval >> 8)  & 0x3) + 1;
-       ext_div  = ((rstval >> 30) & 0x3) + 1;
-       dfs_div  = ((dfsval >> (core * 4)) & 0xf) + 1;
-
-       num = 800000000ULL * pll_divf;
-       denom = 3 * pll_divr * ext_div * dfs_div;
+       if (cpu_is_xlpii()) {
+               num = 1000000ULL * (400 * 3 + 100 * (rstval >> 26));
+               denom = 3;
+       } else {
+               dfsval = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIV_VALUE);
+               pll_divf = ((rstval >> 10) & 0x7f) + 1;
+               pll_divr = ((rstval >> 8)  & 0x3) + 1;
+               ext_div  = ((rstval >> 30) & 0x3) + 1;
+               dfs_div  = ((dfsval >> (core * 4)) & 0xf) + 1;
+
+               num = 800000000ULL * pll_divf;
+               denom = 3 * pll_divr * ext_div * dfs_div;
+       }
        do_div(num, denom);
        return (unsigned int)num;
 }
 
+/* Calculate Frequency to the PIC from PLL.
+ * freq_out = ( ref_freq/2 * (6 + ctrl2[7:0]) + ctrl2[20:8]/2^13 ) /
+ * ((2^ctrl0[7:5]) * Table(ctrl0[26:24]))
+ */
+static unsigned int nlm_2xx_get_pic_frequency(int node)
+{
+       u32 ctrl_val0, ctrl_val2, vco_post_div, pll_post_div;
+       u32 mdiv, fdiv, pll_out_freq_den, reg_select, ref_div, pic_div;
+       u64 ref_clk, sysbase, pll_out_freq_num, ref_clk_select;
+
+       sysbase = nlm_get_node(node)->sysbase;
+
+       /* Find ref_clk_base */
+       ref_clk_select =
+               (nlm_read_sys_reg(sysbase, SYS_POWER_ON_RESET_CFG) >> 18) & 0x3;
+       switch (ref_clk_select) {
+       case 0:
+               ref_clk = 200000000ULL;
+               ref_div = 3;
+               break;
+       case 1:
+               ref_clk = 100000000ULL;
+               ref_div = 1;
+               break;
+       case 2:
+               ref_clk = 125000000ULL;
+               ref_div = 1;
+               break;
+       case 3:
+               ref_clk = 400000000ULL;
+               ref_div = 3;
+               break;
+       }
+
+       /* Find the clock source PLL device for PIC */
+       reg_select = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_SEL) >> 22) & 0x3;
+       switch (reg_select) {
+       case 0:
+               ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0);
+               ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2);
+               break;
+       case 1:
+               ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(0));
+               ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(0));
+               break;
+       case 2:
+               ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(1));
+               ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(1));
+               break;
+       case 3:
+               ctrl_val0 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL0_DEVX(2));
+               ctrl_val2 = nlm_read_sys_reg(sysbase, SYS_PLL_CTRL2_DEVX(2));
+               break;
+       }
+
+       vco_post_div = (ctrl_val0 >> 5) & 0x7;
+       pll_post_div = (ctrl_val0 >> 24) & 0x7;
+       mdiv = ctrl_val2 & 0xff;
+       fdiv = (ctrl_val2 >> 8) & 0xfff;
+
+       /* Find PLL post divider value */
+       switch (pll_post_div) {
+       case 1:
+               pll_post_div = 2;
+               break;
+       case 3:
+               pll_post_div = 4;
+               break;
+       case 7:
+               pll_post_div = 8;
+               break;
+       case 6:
+               pll_post_div = 16;
+               break;
+       case 0:
+       default:
+               pll_post_div = 1;
+               break;
+       }
+
+       fdiv = fdiv/(1 << 13);
+       pll_out_freq_num = ((ref_clk >> 1) * (6 + mdiv)) + fdiv;
+       pll_out_freq_den = (1 << vco_post_div) * pll_post_div * 3;
+
+       if (pll_out_freq_den > 0)
+               do_div(pll_out_freq_num, pll_out_freq_den);
+
+       /* PIC post divider, which happens after PLL */
+       pic_div = (nlm_read_sys_reg(sysbase, SYS_CLK_DEV_DIV) >> 22) & 0x3;
+       do_div(pll_out_freq_num, 1 << pic_div);
+
+       return pll_out_freq_num;
+}
+
+unsigned int nlm_get_pic_frequency(int node)
+{
+       if (cpu_is_xlpii())
+               return nlm_2xx_get_pic_frequency(node);
+       else
+               return 133333333;
+}
+
 unsigned int nlm_get_cpu_frequency(void)
 {
        return nlm_get_core_frequency(0, 0);
 }
+
+/*
+ * Fills upto 8 pairs of entries containing the DRAM map of a node
+ * if n < 0, get dram map for all nodes
+ */
+int xlp_get_dram_map(int n, uint64_t *dram_map)
+{
+       uint64_t bridgebase, base, lim;
+       uint32_t val;
+       int i, node, rv;
+
+       /* Look only at mapping on Node 0, we don't handle crazy configs */
+       bridgebase = nlm_get_bridge_regbase(0);
+       rv = 0;
+       for (i = 0; i < 8; i++) {
+               val = nlm_read_bridge_reg(bridgebase,
+                                       BRIDGE_DRAM_NODE_TRANSLN(i));
+               node = (val >> 1) & 0x3;
+               if (n >= 0 && n != node)
+                       continue;
+               val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_BAR(i));
+               val = (val >>  12) & 0xfffff;
+               base = (uint64_t) val << 20;
+               val = nlm_read_bridge_reg(bridgebase, BRIDGE_DRAM_LIMIT(i));
+               val = (val >>  12) & 0xfffff;
+               if (val == 0)   /* BAR not used */
+                       continue;
+               lim = ((uint64_t)val + 1) << 20;
+               dram_map[rv] = base;
+               dram_map[rv + 1] = lim;
+               rv += 2;
+       }
+       return rv;
+}
index 7b638f7..76a7131 100644 (file)
@@ -73,6 +73,23 @@ static void nlm_fixup_mem(void)
        }
 }
 
+static void __init xlp_init_mem_from_bars(void)
+{
+       uint64_t map[16];
+       int i, n;
+
+       n = xlp_get_dram_map(-1, map);  /* -1: info for all nodes */
+       for (i = 0; i < n; i += 2) {
+               /* exclude 0x1000_0000-0x2000_0000, u-boot device */
+               if (map[i] <= 0x10000000 && map[i+1] > 0x10000000)
+                       map[i+1] = 0x10000000;
+               if (map[i] > 0x10000000 && map[i] < 0x20000000)
+                       map[i] = 0x20000000;
+
+               add_memory_region(map[i], map[i+1] - map[i], BOOT_MEM_RAM);
+       }
+}
+
 void __init plat_mem_setup(void)
 {
        panic_timeout   = 5;
@@ -82,12 +99,23 @@ void __init plat_mem_setup(void)
 
        /* memory and bootargs from DT */
        early_init_devtree(initial_boot_params);
+
+       if (boot_mem_map.nr_map == 0) {
+               pr_info("Using DRAM BARs for memory map.\n");
+               xlp_init_mem_from_bars();
+       }
+       /* Calculate and setup wired entries for mapped kernel */
        nlm_fixup_mem();
 }
 
 const char *get_system_type(void)
 {
-       return "Netlogic XLP Series";
+       switch (read_c0_prid() & 0xff00) {
+       case PRID_IMP_NETLOGIC_XLP2XX:
+               return "Broadcom XLPII Series";
+       default:
+               return "Netlogic XLP Series";
+       }
 }
 
 void __init prom_free_prom_memory(void)
@@ -97,12 +125,20 @@ void __init prom_free_prom_memory(void)
 
 void xlp_mmu_init(void)
 {
-       /* enable extended TLB and Large Fixed TLB */
-       write_c0_config6(read_c0_config6() | 0x24);
-
-       /* set page mask of Fixed TLB in config7 */
-       write_c0_config7(PM_DEFAULT_MASK >>
-               (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2)));
+       u32 conf4;
+
+       if (cpu_is_xlpii()) {
+               /* XLPII series has extended pagesize in config 4 */
+               conf4 = read_c0_config4() & ~0x1f00u;
+               write_c0_config4(conf4 | ((PAGE_SHIFT - 10) / 2 << 8));
+       } else {
+               /* enable extended TLB and Large Fixed TLB */
+               write_c0_config6(read_c0_config6() | 0x24);
+
+               /* set page mask of extended Fixed TLB in config7 */
+               write_c0_config7(PM_DEFAULT_MASK >>
+                       (13 + (ffz(PM_DEFAULT_MASK >> 13) / 2)));
+       }
 }
 
 void nlm_percpu_init(int hwcpuid)
diff --git a/arch/mips/netlogic/xlp/usb-init-xlp2.c b/arch/mips/netlogic/xlp/usb-init-xlp2.c
new file mode 100644 (file)
index 0000000..36e9c22
--- /dev/null
@@ -0,0 +1,218 @@
+/*
+ * Copyright (c) 2003-2013 Broadcom Corporation
+ * All Rights Reserved
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the Broadcom
+ * license below:
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY BROADCOM ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL BROADCOM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+ * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/irq.h>
+
+#include <asm/netlogic/common.h>
+#include <asm/netlogic/haldefs.h>
+#include <asm/netlogic/xlp-hal/iomap.h>
+#include <asm/netlogic/xlp-hal/xlp.h>
+
+#define XLPII_USB3_CTL_0               0xc0
+#define XLPII_VAUXRST                  BIT(0)
+#define XLPII_VCCRST                   BIT(1)
+#define XLPII_NUM2PORT                 9
+#define XLPII_NUM3PORT                 13
+#define XLPII_RTUNEREQ                 BIT(20)
+#define XLPII_MS_CSYSREQ               BIT(21)
+#define XLPII_XS_CSYSREQ               BIT(22)
+#define XLPII_RETENABLEN               BIT(23)
+#define XLPII_TX2RX                    BIT(24)
+#define XLPII_XHCIREV                  BIT(25)
+#define XLPII_ECCDIS                   BIT(26)
+
+#define XLPII_USB3_INT_REG             0xc2
+#define XLPII_USB3_INT_MASK            0xc3
+
+#define XLPII_USB_PHY_TEST             0xc6
+#define XLPII_PRESET                   BIT(0)
+#define XLPII_ATERESET                 BIT(1)
+#define XLPII_LOOPEN                   BIT(2)
+#define XLPII_TESTPDHSP                        BIT(3)
+#define XLPII_TESTPDSSP                        BIT(4)
+#define XLPII_TESTBURNIN               BIT(5)
+
+#define XLPII_USB_PHY_LOS_LV           0xc9
+#define XLPII_LOSLEV                   0
+#define XLPII_LOSBIAS                  5
+#define XLPII_SQRXTX                   8
+#define XLPII_TXBOOST                  11
+#define XLPII_RSLKSEL                  16
+#define XLPII_FSEL                     20
+
+#define XLPII_USB_RFCLK_REG            0xcc
+#define XLPII_VVLD                     30
+
+#define nlm_read_usb_reg(b, r)         nlm_read_reg(b, r)
+#define nlm_write_usb_reg(b, r, v)     nlm_write_reg(b, r, v)
+
+#define nlm_xlpii_get_usb_pcibase(node, inst)          \
+       nlm_pcicfg_base(XLP2XX_IO_USB_OFFSET(node, inst))
+#define nlm_xlpii_get_usb_regbase(node, inst)          \
+       (nlm_xlpii_get_usb_pcibase(node, inst) + XLP_IO_PCI_HDRSZ)
+
+static void xlpii_usb_ack(struct irq_data *data)
+{
+       u64 port_addr;
+
+       switch (data->irq) {
+       case PIC_2XX_XHCI_0_IRQ:
+               port_addr = nlm_xlpii_get_usb_regbase(0, 1);
+               break;
+       case PIC_2XX_XHCI_1_IRQ:
+               port_addr = nlm_xlpii_get_usb_regbase(0, 2);
+               break;
+       case PIC_2XX_XHCI_2_IRQ:
+               port_addr = nlm_xlpii_get_usb_regbase(0, 3);
+               break;
+       default:
+               pr_err("No matching USB irq!\n");
+               return;
+       }
+       nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff);
+}
+
+static void nlm_xlpii_usb_hw_reset(int node, int port)
+{
+       u64 port_addr, xhci_base, pci_base;
+       void __iomem *corebase;
+       u32 val;
+
+       port_addr = nlm_xlpii_get_usb_regbase(node, port);
+
+       /* Set frequency */
+       val = nlm_read_usb_reg(port_addr, XLPII_USB_PHY_LOS_LV);
+       val &= ~(0x3f << XLPII_FSEL);
+       val |= (0x27 << XLPII_FSEL);
+       nlm_write_usb_reg(port_addr, XLPII_USB_PHY_LOS_LV, val);
+
+       val = nlm_read_usb_reg(port_addr, XLPII_USB_RFCLK_REG);
+       val |= (1 << XLPII_VVLD);
+       nlm_write_usb_reg(port_addr, XLPII_USB_RFCLK_REG, val);
+
+       /* PHY reset */
+       val = nlm_read_usb_reg(port_addr, XLPII_USB_PHY_TEST);
+       val &= (XLPII_ATERESET | XLPII_LOOPEN | XLPII_TESTPDHSP
+               | XLPII_TESTPDSSP | XLPII_TESTBURNIN);
+       nlm_write_usb_reg(port_addr, XLPII_USB_PHY_TEST, val);
+
+       /* Setup control register */
+       val =  XLPII_VAUXRST | XLPII_VCCRST | (1 << XLPII_NUM2PORT)
+               | (1 << XLPII_NUM3PORT) | XLPII_MS_CSYSREQ | XLPII_XS_CSYSREQ
+               | XLPII_RETENABLEN | XLPII_XHCIREV;
+       nlm_write_usb_reg(port_addr, XLPII_USB3_CTL_0, val);
+
+       /* Enable interrupts */
+       nlm_write_usb_reg(port_addr, XLPII_USB3_INT_MASK, 0x00000001);
+
+       /* Clear all interrupts */
+       nlm_write_usb_reg(port_addr, XLPII_USB3_INT_REG, 0xffffffff);
+
+       udelay(2000);
+
+       /* XHCI configuration at PCI mem */
+       pci_base = nlm_xlpii_get_usb_pcibase(node, port);
+       xhci_base = nlm_read_usb_reg(pci_base, 0x4) & ~0xf;
+       corebase = ioremap(xhci_base, 0x10000);
+       if (!corebase)
+               return;
+
+       writel(0x240002, corebase + 0xc2c0);
+       /* GCTL 0xc110 */
+       val = readl(corebase + 0xc110);
+       val &= ~(0x3 << 12);
+       val |= (1 << 12);
+       writel(val, corebase + 0xc110);
+       udelay(100);
+
+       /* PHYCFG 0xc200 */
+       val = readl(corebase + 0xc200);
+       val &= ~(1 << 6);
+       writel(val, corebase + 0xc200);
+       udelay(100);
+
+       /* PIPECTL 0xc2c0 */
+       val = readl(corebase + 0xc2c0);
+       val &= ~(1 << 17);
+       writel(val, corebase + 0xc2c0);
+
+       iounmap(corebase);
+}
+
+static int __init nlm_platform_xlpii_usb_init(void)
+{
+       if (!cpu_is_xlpii())
+               return 0;
+
+       pr_info("Initializing 2XX USB Interface\n");
+       nlm_xlpii_usb_hw_reset(0, 1);
+       nlm_xlpii_usb_hw_reset(0, 2);
+       nlm_xlpii_usb_hw_reset(0, 3);
+       nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_0_IRQ, xlpii_usb_ack);
+       nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_1_IRQ, xlpii_usb_ack);
+       nlm_set_pic_extra_ack(0, PIC_2XX_XHCI_2_IRQ, xlpii_usb_ack);
+
+       return 0;
+}
+
+arch_initcall(nlm_platform_xlpii_usb_init);
+
+static u64 xlp_usb_dmamask = ~(u32)0;
+
+/* Fixup IRQ for USB devices on XLP the SoC PCIe bus */
+static void nlm_usb_fixup_final(struct pci_dev *dev)
+{
+       dev->dev.dma_mask               = &xlp_usb_dmamask;
+       dev->dev.coherent_dma_mask      = DMA_BIT_MASK(32);
+       switch (dev->devfn) {
+       case 0x21:
+               dev->irq = PIC_2XX_XHCI_0_IRQ;
+               break;
+       case 0x22:
+               dev->irq = PIC_2XX_XHCI_1_IRQ;
+               break;
+       case 0x23:
+               dev->irq = PIC_2XX_XHCI_2_IRQ;
+               break;
+       }
+}
+
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_NETLOGIC, PCI_DEVICE_ID_NLM_XHCI,
+               nlm_usb_fixup_final);
index ef3897e..f811798 100644 (file)
@@ -75,8 +75,7 @@ static void nlm_usb_intr_en(int node, int port)
        port_addr = nlm_get_usb_regbase(node, port);
        val = nlm_read_usb_reg(port_addr, USB_INT_EN);
        val = USB_CTRL_INTERRUPT_EN  | USB_OHCI_INTERRUPT_EN |
-               USB_OHCI_INTERRUPT1_EN | USB_CTRL_INTERRUPT_EN  |
-               USB_OHCI_INTERRUPT_EN | USB_OHCI_INTERRUPT2_EN;
+               USB_OHCI_INTERRUPT1_EN | USB_OHCI_INTERRUPT2_EN;
        nlm_write_usb_reg(port_addr, USB_INT_EN, val);
 }
 
@@ -100,6 +99,9 @@ static void nlm_usb_hw_reset(int node, int port)
 
 static int __init nlm_platform_usb_init(void)
 {
+       if (cpu_is_xlpii())
+               return 0;
+
        pr_info("Initializing USB Interface\n");
        nlm_usb_hw_reset(0, 0);
        nlm_usb_hw_reset(0, 3);
index 0cce37c..682d563 100644 (file)
@@ -58,10 +58,12 @@ static int xlp_wakeup_core(uint64_t sysbase, int node, int core)
 
        coremask = (1 << core);
 
-       /* Enable CPU clock */
-       value = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL);
-       value &= ~coremask;
-       nlm_write_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL, value);
+       /* Enable CPU clock in case of 8xx/3xx */
+       if (!cpu_is_xlpii()) {
+               value = nlm_read_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL);
+               value &= ~coremask;
+               nlm_write_sys_reg(sysbase, SYS_CORE_DFS_DIS_CTRL, value);
+       }
 
        /* Remove CPU Reset */
        value = nlm_read_sys_reg(sysbase, SYS_CPU_RESET);
index c382042..719e455 100644 (file)
@@ -41,6 +41,7 @@ obj-$(CONFIG_SIBYTE_BCM1x80)  += pci-bcm1480.o pci-bcm1480ht.o
 obj-$(CONFIG_SNI_RM)           += fixup-sni.o ops-sni.o
 obj-$(CONFIG_LANTIQ)           += fixup-lantiq.o
 obj-$(CONFIG_PCI_LANTIQ)       += pci-lantiq.o ops-lantiq.o
+obj-$(CONFIG_SOC_RT3883)       += pci-rt3883.o
 obj-$(CONFIG_TANBAC_TB0219)    += fixup-tb0219.o
 obj-$(CONFIG_TANBAC_TB0226)    += fixup-tb0226.o
 obj-$(CONFIG_TANBAC_TB0287)    += fixup-tb0287.o
index 95c2ea8..59cccd9 100644 (file)
@@ -586,15 +586,16 @@ static int __init octeon_pci_setup(void)
        else
                octeon_dma_bar_type = OCTEON_DMA_BAR_TYPE_BIG;
 
-       /* PCI I/O and PCI MEM values */
-       set_io_port_base(OCTEON_PCI_IOSPACE_BASE);
-       ioport_resource.start = 0;
-       ioport_resource.end = OCTEON_PCI_IOSPACE_SIZE - 1;
        if (!octeon_is_pci_host()) {
                pr_notice("Not in host mode, PCI Controller not initialized\n");
                return 0;
        }
 
+       /* PCI I/O and PCI MEM values */
+       set_io_port_base(OCTEON_PCI_IOSPACE_BASE);
+       ioport_resource.start = 0;
+       ioport_resource.end = OCTEON_PCI_IOSPACE_SIZE - 1;
+
        pr_notice("%s Octeon big bar support\n",
                  (octeon_dma_bar_type ==
                  OCTEON_DMA_BAR_TYPE_BIG) ? "Enabling" : "Disabling");
diff --git a/arch/mips/pci/pci-rt3883.c b/arch/mips/pci/pci-rt3883.c
new file mode 100644 (file)
index 0000000..95c9d41
--- /dev/null
@@ -0,0 +1,636 @@
+/*
+ *  Ralink RT3662/RT3883 SoC PCI support
+ *
+ *  Copyright (C) 2011-2013 Gabor Juhos <juhosg@openwrt.org>
+ *
+ *  Parts of this file are based on Ralink's 2.6.21 BSP
+ *
+ *  This program is free software; you can redistribute it and/or modify it
+ *  under the terms of the GNU General Public License version 2 as published
+ *  by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/platform_device.h>
+
+#include <asm/mach-ralink/rt3883.h>
+#include <asm/mach-ralink/ralink_regs.h>
+
+#define RT3883_MEMORY_BASE             0x00000000
+#define RT3883_MEMORY_SIZE             0x02000000
+
+#define RT3883_PCI_REG_PCICFG          0x00
+#define   RT3883_PCICFG_P2P_BR_DEVNUM_M 0xf
+#define   RT3883_PCICFG_P2P_BR_DEVNUM_S 16
+#define   RT3883_PCICFG_PCIRST         BIT(1)
+#define RT3883_PCI_REG_PCIRAW          0x04
+#define RT3883_PCI_REG_PCIINT          0x08
+#define RT3883_PCI_REG_PCIENA          0x0c
+
+#define RT3883_PCI_REG_CFGADDR         0x20
+#define RT3883_PCI_REG_CFGDATA         0x24
+#define RT3883_PCI_REG_MEMBASE         0x28
+#define RT3883_PCI_REG_IOBASE          0x2c
+#define RT3883_PCI_REG_ARBCTL          0x80
+
+#define RT3883_PCI_REG_BASE(_x)                (0x1000 + (_x) * 0x1000)
+#define RT3883_PCI_REG_BAR0SETUP(_x)   (RT3883_PCI_REG_BASE((_x)) + 0x10)
+#define RT3883_PCI_REG_IMBASEBAR0(_x)  (RT3883_PCI_REG_BASE((_x)) + 0x18)
+#define RT3883_PCI_REG_ID(_x)          (RT3883_PCI_REG_BASE((_x)) + 0x30)
+#define RT3883_PCI_REG_CLASS(_x)       (RT3883_PCI_REG_BASE((_x)) + 0x34)
+#define RT3883_PCI_REG_SUBID(_x)       (RT3883_PCI_REG_BASE((_x)) + 0x38)
+#define RT3883_PCI_REG_STATUS(_x)      (RT3883_PCI_REG_BASE((_x)) + 0x50)
+
+#define RT3883_PCI_MODE_NONE   0
+#define RT3883_PCI_MODE_PCI    BIT(0)
+#define RT3883_PCI_MODE_PCIE   BIT(1)
+#define RT3883_PCI_MODE_BOTH   (RT3883_PCI_MODE_PCI | RT3883_PCI_MODE_PCIE)
+
+#define RT3883_PCI_IRQ_COUNT   32
+
+#define RT3883_P2P_BR_DEVNUM   1
+
+struct rt3883_pci_controller {
+       void __iomem *base;
+       spinlock_t lock;
+
+       struct device_node *intc_of_node;
+       struct irq_domain *irq_domain;
+
+       struct pci_controller pci_controller;
+       struct resource io_res;
+       struct resource mem_res;
+
+       bool pcie_ready;
+};
+
+static inline struct rt3883_pci_controller *
+pci_bus_to_rt3883_controller(struct pci_bus *bus)
+{
+       struct pci_controller *hose;
+
+       hose = (struct pci_controller *) bus->sysdata;
+       return container_of(hose, struct rt3883_pci_controller, pci_controller);
+}
+
+static inline u32 rt3883_pci_r32(struct rt3883_pci_controller *rpc,
+                                unsigned reg)
+{
+       return ioread32(rpc->base + reg);
+}
+
+static inline void rt3883_pci_w32(struct rt3883_pci_controller *rpc,
+                                 u32 val, unsigned reg)
+{
+       iowrite32(val, rpc->base + reg);
+}
+
+static inline u32 rt3883_pci_get_cfgaddr(unsigned int bus, unsigned int slot,
+                                        unsigned int func, unsigned int where)
+{
+       return (bus << 16) | (slot << 11) | (func << 8) | (where & 0xfc) |
+              0x80000000;
+}
+
+static u32 rt3883_pci_read_cfg32(struct rt3883_pci_controller *rpc,
+                              unsigned bus, unsigned slot,
+                              unsigned func, unsigned reg)
+{
+       unsigned long flags;
+       u32 address;
+       u32 ret;
+
+       address = rt3883_pci_get_cfgaddr(bus, slot, func, reg);
+
+       spin_lock_irqsave(&rpc->lock, flags);
+       rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR);
+       ret = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA);
+       spin_unlock_irqrestore(&rpc->lock, flags);
+
+       return ret;
+}
+
+static void rt3883_pci_write_cfg32(struct rt3883_pci_controller *rpc,
+                                unsigned bus, unsigned slot,
+                                unsigned func, unsigned reg, u32 val)
+{
+       unsigned long flags;
+       u32 address;
+
+       address = rt3883_pci_get_cfgaddr(bus, slot, func, reg);
+
+       spin_lock_irqsave(&rpc->lock, flags);
+       rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR);
+       rt3883_pci_w32(rpc, val, RT3883_PCI_REG_CFGDATA);
+       spin_unlock_irqrestore(&rpc->lock, flags);
+}
+
+static void rt3883_pci_irq_handler(unsigned int irq, struct irq_desc *desc)
+{
+       struct rt3883_pci_controller *rpc;
+       u32 pending;
+
+       rpc = irq_get_handler_data(irq);
+
+       pending = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIINT) &
+                 rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA);
+
+       if (!pending) {
+               spurious_interrupt();
+               return;
+       }
+
+       while (pending) {
+               unsigned bit = __ffs(pending);
+
+               irq = irq_find_mapping(rpc->irq_domain, bit);
+               generic_handle_irq(irq);
+
+               pending &= ~BIT(bit);
+       }
+}
+
+static void rt3883_pci_irq_unmask(struct irq_data *d)
+{
+       struct rt3883_pci_controller *rpc;
+       u32 t;
+
+       rpc = irq_data_get_irq_chip_data(d);
+
+       t = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA);
+       rt3883_pci_w32(rpc, t | BIT(d->hwirq), RT3883_PCI_REG_PCIENA);
+       /* flush write */
+       rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA);
+}
+
+static void rt3883_pci_irq_mask(struct irq_data *d)
+{
+       struct rt3883_pci_controller *rpc;
+       u32 t;
+
+       rpc = irq_data_get_irq_chip_data(d);
+
+       t = rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA);
+       rt3883_pci_w32(rpc, t & ~BIT(d->hwirq), RT3883_PCI_REG_PCIENA);
+       /* flush write */
+       rt3883_pci_r32(rpc, RT3883_PCI_REG_PCIENA);
+}
+
+static struct irq_chip rt3883_pci_irq_chip = {
+       .name           = "RT3883 PCI",
+       .irq_mask       = rt3883_pci_irq_mask,
+       .irq_unmask     = rt3883_pci_irq_unmask,
+       .irq_mask_ack   = rt3883_pci_irq_mask,
+};
+
+static int rt3883_pci_irq_map(struct irq_domain *d, unsigned int irq,
+                             irq_hw_number_t hw)
+{
+       irq_set_chip_and_handler(irq, &rt3883_pci_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, d->host_data);
+
+       return 0;
+}
+
+static const struct irq_domain_ops rt3883_pci_irq_domain_ops = {
+       .map = rt3883_pci_irq_map,
+       .xlate = irq_domain_xlate_onecell,
+};
+
+static int rt3883_pci_irq_init(struct device *dev,
+                              struct rt3883_pci_controller *rpc)
+{
+       int irq;
+
+       irq = irq_of_parse_and_map(rpc->intc_of_node, 0);
+       if (irq == 0) {
+               dev_err(dev, "%s has no IRQ",
+                       of_node_full_name(rpc->intc_of_node));
+               return -EINVAL;
+       }
+
+       /* disable all interrupts */
+       rt3883_pci_w32(rpc, 0, RT3883_PCI_REG_PCIENA);
+
+       rpc->irq_domain =
+               irq_domain_add_linear(rpc->intc_of_node, RT3883_PCI_IRQ_COUNT,
+                                     &rt3883_pci_irq_domain_ops,
+                                     rpc);
+       if (!rpc->irq_domain) {
+               dev_err(dev, "unable to add IRQ domain\n");
+               return -ENODEV;
+       }
+
+       irq_set_handler_data(irq, rpc);
+       irq_set_chained_handler(irq, rt3883_pci_irq_handler);
+
+       return 0;
+}
+
+static int rt3883_pci_config_read(struct pci_bus *bus, unsigned int devfn,
+                                 int where, int size, u32 *val)
+{
+       struct rt3883_pci_controller *rpc;
+       unsigned long flags;
+       u32 address;
+       u32 data;
+
+       rpc = pci_bus_to_rt3883_controller(bus);
+
+       if (!rpc->pcie_ready && bus->number == 1)
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       address = rt3883_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn),
+                                        PCI_FUNC(devfn), where);
+
+       spin_lock_irqsave(&rpc->lock, flags);
+       rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR);
+       data = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA);
+       spin_unlock_irqrestore(&rpc->lock, flags);
+
+       switch (size) {
+       case 1:
+               *val = (data >> ((where & 3) << 3)) & 0xff;
+               break;
+       case 2:
+               *val = (data >> ((where & 3) << 3)) & 0xffff;
+               break;
+       case 4:
+               *val = data;
+               break;
+       }
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static int rt3883_pci_config_write(struct pci_bus *bus, unsigned int devfn,
+                                  int where, int size, u32 val)
+{
+       struct rt3883_pci_controller *rpc;
+       unsigned long flags;
+       u32 address;
+       u32 data;
+
+       rpc = pci_bus_to_rt3883_controller(bus);
+
+       if (!rpc->pcie_ready && bus->number == 1)
+               return PCIBIOS_DEVICE_NOT_FOUND;
+
+       address = rt3883_pci_get_cfgaddr(bus->number, PCI_SLOT(devfn),
+                                        PCI_FUNC(devfn), where);
+
+       spin_lock_irqsave(&rpc->lock, flags);
+       rt3883_pci_w32(rpc, address, RT3883_PCI_REG_CFGADDR);
+       data = rt3883_pci_r32(rpc, RT3883_PCI_REG_CFGDATA);
+
+       switch (size) {
+       case 1:
+               data = (data & ~(0xff << ((where & 3) << 3))) |
+                      (val << ((where & 3) << 3));
+               break;
+       case 2:
+               data = (data & ~(0xffff << ((where & 3) << 3))) |
+                      (val << ((where & 3) << 3));
+               break;
+       case 4:
+               data = val;
+               break;
+       }
+
+       rt3883_pci_w32(rpc, data, RT3883_PCI_REG_CFGDATA);
+       spin_unlock_irqrestore(&rpc->lock, flags);
+
+       return PCIBIOS_SUCCESSFUL;
+}
+
+static struct pci_ops rt3883_pci_ops = {
+       .read   = rt3883_pci_config_read,
+       .write  = rt3883_pci_config_write,
+};
+
+static void rt3883_pci_preinit(struct rt3883_pci_controller *rpc, unsigned mode)
+{
+       u32 syscfg1;
+       u32 rstctrl;
+       u32 clkcfg1;
+       u32 t;
+
+       rstctrl = rt_sysc_r32(RT3883_SYSC_REG_RSTCTRL);
+       syscfg1 = rt_sysc_r32(RT3883_SYSC_REG_SYSCFG1);
+       clkcfg1 = rt_sysc_r32(RT3883_SYSC_REG_CLKCFG1);
+
+       if (mode & RT3883_PCI_MODE_PCIE) {
+               rstctrl |= RT3883_RSTCTRL_PCIE;
+               rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL);
+
+               /* setup PCI PAD drive mode */
+               syscfg1 &= ~(0x30);
+               syscfg1 |= (2 << 4);
+               rt_sysc_w32(syscfg1, RT3883_SYSC_REG_SYSCFG1);
+
+               t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0);
+               t &= ~BIT(31);
+               rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0);
+
+               t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN1);
+               t &= 0x80ffffff;
+               rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN1);
+
+               t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN1);
+               t |= 0xa << 24;
+               rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN1);
+
+               t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0);
+               t |= BIT(31);
+               rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0);
+
+               msleep(50);
+
+               rstctrl &= ~RT3883_RSTCTRL_PCIE;
+               rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL);
+       }
+
+       syscfg1 |= (RT3883_SYSCFG1_PCIE_RC_MODE | RT3883_SYSCFG1_PCI_HOST_MODE);
+
+       clkcfg1 &= ~(RT3883_CLKCFG1_PCI_CLK_EN | RT3883_CLKCFG1_PCIE_CLK_EN);
+
+       if (mode & RT3883_PCI_MODE_PCI) {
+               clkcfg1 |= RT3883_CLKCFG1_PCI_CLK_EN;
+               rstctrl &= ~RT3883_RSTCTRL_PCI;
+       }
+
+       if (mode & RT3883_PCI_MODE_PCIE) {
+               clkcfg1 |= RT3883_CLKCFG1_PCIE_CLK_EN;
+               rstctrl &= ~RT3883_RSTCTRL_PCIE;
+       }
+
+       rt_sysc_w32(syscfg1, RT3883_SYSC_REG_SYSCFG1);
+       rt_sysc_w32(rstctrl, RT3883_SYSC_REG_RSTCTRL);
+       rt_sysc_w32(clkcfg1, RT3883_SYSC_REG_CLKCFG1);
+
+       msleep(500);
+
+       /*
+        * setup the device number of the P2P bridge
+        * and de-assert the reset line
+        */
+       t = (RT3883_P2P_BR_DEVNUM << RT3883_PCICFG_P2P_BR_DEVNUM_S);
+       rt3883_pci_w32(rpc, t, RT3883_PCI_REG_PCICFG);
+
+       /* flush write */
+       rt3883_pci_r32(rpc, RT3883_PCI_REG_PCICFG);
+       msleep(500);
+
+       if (mode & RT3883_PCI_MODE_PCIE) {
+               msleep(500);
+
+               t = rt3883_pci_r32(rpc, RT3883_PCI_REG_STATUS(1));
+
+               rpc->pcie_ready = t & BIT(0);
+
+               if (!rpc->pcie_ready) {
+                       /* reset the PCIe block */
+                       t = rt_sysc_r32(RT3883_SYSC_REG_RSTCTRL);
+                       t |= RT3883_RSTCTRL_PCIE;
+                       rt_sysc_w32(t, RT3883_SYSC_REG_RSTCTRL);
+                       t &= ~RT3883_RSTCTRL_PCIE;
+                       rt_sysc_w32(t, RT3883_SYSC_REG_RSTCTRL);
+
+                       /* turn off PCIe clock */
+                       t = rt_sysc_r32(RT3883_SYSC_REG_CLKCFG1);
+                       t &= ~RT3883_CLKCFG1_PCIE_CLK_EN;
+                       rt_sysc_w32(t, RT3883_SYSC_REG_CLKCFG1);
+
+                       t = rt_sysc_r32(RT3883_SYSC_REG_PCIE_CLK_GEN0);
+                       t &= ~0xf000c080;
+                       rt_sysc_w32(t, RT3883_SYSC_REG_PCIE_CLK_GEN0);
+               }
+       }
+
+       /* enable PCI arbiter */
+       rt3883_pci_w32(rpc, 0x79, RT3883_PCI_REG_ARBCTL);
+}
+
+static int rt3883_pci_probe(struct platform_device *pdev)
+{
+       struct rt3883_pci_controller *rpc;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
+       struct resource *res;
+       struct device_node *child;
+       u32 val;
+       int err;
+       int mode;
+
+       rpc = devm_kzalloc(dev, sizeof(*rpc), GFP_KERNEL);
+       if (!rpc)
+               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (!res)
+               return -EINVAL;
+
+       rpc->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(rpc->base))
+               return PTR_ERR(rpc->base);
+
+       /* find the interrupt controller child node */
+       for_each_child_of_node(np, child) {
+               if (of_get_property(child, "interrupt-controller", NULL) &&
+                   of_node_get(child)) {
+                       rpc->intc_of_node = child;
+                       break;
+               }
+       }
+
+       if (!rpc->intc_of_node) {
+               dev_err(dev, "%s has no %s child node",
+                       of_node_full_name(rpc->intc_of_node),
+                       "interrupt controller");
+               return -EINVAL;
+       }
+
+       /* find the PCI host bridge child node */
+       for_each_child_of_node(np, child) {
+               if (child->type &&
+                   of_node_cmp(child->type, "pci") == 0 &&
+                   of_node_get(child)) {
+                       rpc->pci_controller.of_node = child;
+                       break;
+               }
+       }
+
+       if (!rpc->pci_controller.of_node) {
+               dev_err(dev, "%s has no %s child node",
+                       of_node_full_name(rpc->intc_of_node),
+                       "PCI host bridge");
+               err = -EINVAL;
+               goto err_put_intc_node;
+       }
+
+       mode = RT3883_PCI_MODE_NONE;
+       for_each_available_child_of_node(rpc->pci_controller.of_node, child) {
+               int devfn;
+
+               if (!child->type ||
+                   of_node_cmp(child->type, "pci") != 0)
+                       continue;
+
+               devfn = of_pci_get_devfn(child);
+               if (devfn < 0)
+                       continue;
+
+               switch (PCI_SLOT(devfn)) {
+               case 1:
+                       mode |= RT3883_PCI_MODE_PCIE;
+                       break;
+
+               case 17:
+               case 18:
+                       mode |= RT3883_PCI_MODE_PCI;
+                       break;
+               }
+       }
+
+       if (mode == RT3883_PCI_MODE_NONE) {
+               dev_err(dev, "unable to determine PCI mode\n");
+               err = -EINVAL;
+               goto err_put_hb_node;
+       }
+
+       dev_info(dev, "mode:%s%s\n",
+                (mode & RT3883_PCI_MODE_PCI) ? " PCI" : "",
+                (mode & RT3883_PCI_MODE_PCIE) ? " PCIe" : "");
+
+       rt3883_pci_preinit(rpc, mode);
+
+       rpc->pci_controller.pci_ops = &rt3883_pci_ops;
+       rpc->pci_controller.io_resource = &rpc->io_res;
+       rpc->pci_controller.mem_resource = &rpc->mem_res;
+
+       /* Load PCI I/O and memory resources from DT */
+       pci_load_of_ranges(&rpc->pci_controller,
+                          rpc->pci_controller.of_node);
+
+       rt3883_pci_w32(rpc, rpc->mem_res.start, RT3883_PCI_REG_MEMBASE);
+       rt3883_pci_w32(rpc, rpc->io_res.start, RT3883_PCI_REG_IOBASE);
+
+       ioport_resource.start = rpc->io_res.start;
+       ioport_resource.end = rpc->io_res.end;
+
+       /* PCI */
+       rt3883_pci_w32(rpc, 0x03ff0000, RT3883_PCI_REG_BAR0SETUP(0));
+       rt3883_pci_w32(rpc, RT3883_MEMORY_BASE, RT3883_PCI_REG_IMBASEBAR0(0));
+       rt3883_pci_w32(rpc, 0x08021814, RT3883_PCI_REG_ID(0));
+       rt3883_pci_w32(rpc, 0x00800001, RT3883_PCI_REG_CLASS(0));
+       rt3883_pci_w32(rpc, 0x28801814, RT3883_PCI_REG_SUBID(0));
+
+       /* PCIe */
+       rt3883_pci_w32(rpc, 0x03ff0000, RT3883_PCI_REG_BAR0SETUP(1));
+       rt3883_pci_w32(rpc, RT3883_MEMORY_BASE, RT3883_PCI_REG_IMBASEBAR0(1));
+       rt3883_pci_w32(rpc, 0x08021814, RT3883_PCI_REG_ID(1));
+       rt3883_pci_w32(rpc, 0x06040001, RT3883_PCI_REG_CLASS(1));
+       rt3883_pci_w32(rpc, 0x28801814, RT3883_PCI_REG_SUBID(1));
+
+       err = rt3883_pci_irq_init(dev, rpc);
+       if (err)
+               goto err_put_hb_node;
+
+       /* PCIe */
+       val = rt3883_pci_read_cfg32(rpc, 0, 0x01, 0, PCI_COMMAND);
+       val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+       rt3883_pci_write_cfg32(rpc, 0, 0x01, 0, PCI_COMMAND, val);
+
+       /* PCI */
+       val = rt3883_pci_read_cfg32(rpc, 0, 0x00, 0, PCI_COMMAND);
+       val |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER;
+       rt3883_pci_write_cfg32(rpc, 0, 0x00, 0, PCI_COMMAND, val);
+
+       if (mode == RT3883_PCI_MODE_PCIE) {
+               rt3883_pci_w32(rpc, 0x03ff0001, RT3883_PCI_REG_BAR0SETUP(0));
+               rt3883_pci_w32(rpc, 0x03ff0001, RT3883_PCI_REG_BAR0SETUP(1));
+
+               rt3883_pci_write_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0,
+                                      PCI_BASE_ADDRESS_0,
+                                      RT3883_MEMORY_BASE);
+               /* flush write */
+               rt3883_pci_read_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0,
+                                     PCI_BASE_ADDRESS_0);
+       } else {
+               rt3883_pci_write_cfg32(rpc, 0, RT3883_P2P_BR_DEVNUM, 0,
+                                      PCI_IO_BASE, 0x00000101);
+       }
+
+       register_pci_controller(&rpc->pci_controller);
+
+       return 0;
+
+err_put_hb_node:
+       of_node_put(rpc->pci_controller.of_node);
+err_put_intc_node:
+       of_node_put(rpc->intc_of_node);
+       return err;
+}
+
+int __init pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
+{
+       struct of_irq dev_irq;
+       int err;
+       int irq;
+
+       err = of_irq_map_pci(dev, &dev_irq);
+       if (err) {
+               pr_err("pci %s: unable to get irq map, err=%d\n",
+                      pci_name((struct pci_dev *) dev), err);
+               return 0;
+       }
+
+       irq = irq_create_of_mapping(dev_irq.controller,
+                                   dev_irq.specifier,
+                                   dev_irq.size);
+
+       if (irq == 0)
+               pr_crit("pci %s: no irq found for pin %u\n",
+                       pci_name((struct pci_dev *) dev), pin);
+       else
+               pr_info("pci %s: using irq %d for pin %u\n",
+                       pci_name((struct pci_dev *) dev), irq, pin);
+
+       return irq;
+}
+
+int pcibios_plat_dev_init(struct pci_dev *dev)
+{
+       return 0;
+}
+
+static const struct of_device_id rt3883_pci_ids[] = {
+       { .compatible = "ralink,rt3883-pci" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rt3883_pci_ids);
+
+static struct platform_driver rt3883_pci_driver = {
+       .probe = rt3883_pci_probe,
+       .driver = {
+               .name = "rt3883-pci",
+               .owner = THIS_MODULE,
+               .of_match_table = of_match_ptr(rt3883_pci_ids),
+       },
+};
+
+static int __init rt3883_pci_init(void)
+{
+       return platform_driver_register(&rt3883_pci_driver);
+}
+
+postcore_initcall(rt3883_pci_init);
index 1a1b03e..dd91fba 100644 (file)
@@ -1,14 +1,7 @@
-config BOOTLOADER_DRIVER
-       bool "PowerTV Bootloader Driver Support"
-       default n
-       depends on POWERTV
-       help
-         Use this option if you want to load bootloader driver.
-
 config BOOTLOADER_FAMILY
        string "POWERTV Bootloader Family string"
        default "85"
-       depends on POWERTV && !BOOTLOADER_DRIVER
+       depends on POWERTV
        help
          This value should be specified when the bootloader driver is disabled
          and must be exactly two characters long. Families supported are:
index 0238af1..8380605 100644 (file)
@@ -147,20 +147,10 @@ static __init noinline void platform_set_family(void)
        if (check_forcefamily(forced_family) == 0)
                bootldr_family = BOOTLDRFAMILY(forced_family[0],
                        forced_family[1]);
-       else {
-
-#ifdef CONFIG_BOOTLOADER_DRIVER
-               bootldr_family = (unsigned short) kbldr_GetSWFamily();
-#else
-#if defined(CONFIG_BOOTLOADER_FAMILY)
+       else
                bootldr_family = (unsigned short) BOOTLDRFAMILY(
                        CONFIG_BOOTLOADER_FAMILY[0],
                        CONFIG_BOOTLOADER_FAMILY[1]);
-#else
-#error "Unknown Bootloader Family"
-#endif
-#endif
-       }
 
        pr_info("Bootloader Family = 0x%04X\n", bootldr_family);
 
index a01baff..4989263 100644 (file)
@@ -87,8 +87,4 @@ void __init prom_init(void)
 
        configure_platform();
        prom_meminit();
-
-#ifndef CONFIG_BOOTLOADER_DRIVER
-       pr_info("\nBootloader driver isn't loaded...\n");
-#endif
 }
index 0007652..11c32fb 100644 (file)
 #include <linux/io.h>
 #include <asm/reboot.h>                        /* Not included by linux/reboot.h */
 
-#ifdef CONFIG_BOOTLOADER_DRIVER
-#include <asm/mach-powertv/kbldr.h>
-#endif
-
 #include <asm/mach-powertv/asic_regs.h>
 #include "reset.h"
 
 static void mips_machine_restart(char *command)
 {
-#ifdef CONFIG_BOOTLOADER_DRIVER
-       /*
-        * Call the bootloader's reset function to ensure
-        * that persistent data is flushed before hard reset
-        */
-       kbldr_SetCauseAndReset();
-#else
        writel(0x1, asic_reg_addr(watchdog));
-#endif
 }
 
 void mips_reboot_setup(void)
index 026e823..424f034 100644 (file)
@@ -1,5 +1,12 @@
 if RALINK
 
+config CLKEVT_RT3352
+       bool
+       depends on SOC_RT305X || SOC_MT7620
+       default y
+       select CLKSRC_OF
+       select CLKSRC_MMIO
+
 choice
        prompt "Ralink SoC selection"
        default SOC_RT305X
@@ -19,9 +26,12 @@ choice
                bool "RT3883"
                select USB_ARCH_HAS_OHCI
                select USB_ARCH_HAS_EHCI
+               select HW_HAS_PCI
 
        config SOC_MT7620
                bool "MT7620"
+               select USB_ARCH_HAS_OHCI
+               select USB_ARCH_HAS_EHCI
 
 endchoice
 
index 38cf1a8..98ae349 100644 (file)
@@ -6,7 +6,9 @@
 # Copyright (C) 2009-2011 Gabor Juhos <juhosg@openwrt.org>
 # Copyright (C) 2013 John Crispin <blogic@openwrt.org>
 
-obj-y := prom.o of.o reset.o clk.o irq.o
+obj-y := prom.o of.o reset.o clk.o irq.o timer.o
+
+obj-$(CONFIG_CLKEVT_RT3352) += cevt-rt3352.o
 
 obj-$(CONFIG_SOC_RT288X) += rt288x.o
 obj-$(CONFIG_SOC_RT305X) += rt305x.o
index cda4b66..6d9c8c4 100644 (file)
@@ -26,3 +26,4 @@ cflags-$(CONFIG_SOC_RT3883)   += -I$(srctree)/arch/mips/include/asm/mach-ralink/rt
 # Ralink MT7620
 #
 load-$(CONFIG_SOC_MT7620)      += 0xffffffff80000000
+cflags-$(CONFIG_SOC_MT7620)    += -I$(srctree)/arch/mips/include/asm/mach-ralink/mt7620
diff --git a/arch/mips/ralink/cevt-rt3352.c b/arch/mips/ralink/cevt-rt3352.c
new file mode 100644 (file)
index 0000000..cc17566
--- /dev/null
@@ -0,0 +1,145 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2013 by John Crispin <blogic@openwrt.org>
+ */
+
+#include <linux/clockchips.h>
+#include <linux/clocksource.h>
+#include <linux/interrupt.h>
+#include <linux/reset.h>
+#include <linux/init.h>
+#include <linux/time.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+
+#define SYSTICK_FREQ           (50 * 1000)
+
+#define SYSTICK_CONFIG         0x00
+#define SYSTICK_COMPARE                0x04
+#define SYSTICK_COUNT          0x08
+
+/* route systick irq to mips irq 7 instead of the r4k-timer */
+#define CFG_EXT_STK_EN         0x2
+/* enable the counter */
+#define CFG_CNT_EN             0x1
+
+struct systick_device {
+       void __iomem *membase;
+       struct clock_event_device dev;
+       int irq_requested;
+       int freq_scale;
+};
+
+static void systick_set_clock_mode(enum clock_event_mode mode,
+                               struct clock_event_device *evt);
+
+static int systick_next_event(unsigned long delta,
+                               struct clock_event_device *evt)
+{
+       struct systick_device *sdev;
+       u32 count;
+
+       sdev = container_of(evt, struct systick_device, dev);
+       count = ioread32(sdev->membase + SYSTICK_COUNT);
+       count = (count + delta) % SYSTICK_FREQ;
+       iowrite32(count + delta, sdev->membase + SYSTICK_COMPARE);
+
+       return 0;
+}
+
+static void systick_event_handler(struct clock_event_device *dev)
+{
+       /* noting to do here */
+}
+
+static irqreturn_t systick_interrupt(int irq, void *dev_id)
+{
+       struct clock_event_device *dev = (struct clock_event_device *) dev_id;
+
+       dev->event_handler(dev);
+
+       return IRQ_HANDLED;
+}
+
+static struct systick_device systick = {
+       .dev = {
+               /*
+                * cevt-r4k uses 300, make sure systick
+                * gets used if available
+                */
+               .rating         = 310,
+               .features       = CLOCK_EVT_FEAT_ONESHOT,
+               .set_next_event = systick_next_event,
+               .set_mode       = systick_set_clock_mode,
+               .event_handler  = systick_event_handler,
+       },
+};
+
+static struct irqaction systick_irqaction = {
+       .handler = systick_interrupt,
+       .flags = IRQF_PERCPU | IRQF_TIMER,
+       .dev_id = &systick.dev,
+};
+
+static void systick_set_clock_mode(enum clock_event_mode mode,
+                               struct clock_event_device *evt)
+{
+       struct systick_device *sdev;
+
+       sdev = container_of(evt, struct systick_device, dev);
+
+       switch (mode) {
+       case CLOCK_EVT_MODE_ONESHOT:
+               if (!sdev->irq_requested)
+                       setup_irq(systick.dev.irq, &systick_irqaction);
+               sdev->irq_requested = 1;
+               iowrite32(CFG_EXT_STK_EN | CFG_CNT_EN,
+                               systick.membase + SYSTICK_CONFIG);
+               break;
+
+       case CLOCK_EVT_MODE_SHUTDOWN:
+               if (sdev->irq_requested)
+                       free_irq(systick.dev.irq, &systick_irqaction);
+               sdev->irq_requested = 0;
+               iowrite32(0, systick.membase + SYSTICK_CONFIG);
+               break;
+
+       default:
+               pr_err("%s: Unhandeled mips clock_mode\n", systick.dev.name);
+               break;
+       }
+}
+
+static void __init ralink_systick_init(struct device_node *np)
+{
+       systick.membase = of_iomap(np, 0);
+       if (!systick.membase)
+               return;
+
+       systick_irqaction.name = np->name;
+       systick.dev.name = np->name;
+       clockevents_calc_mult_shift(&systick.dev, SYSTICK_FREQ, 60);
+       systick.dev.max_delta_ns = clockevent_delta2ns(0x7fff, &systick.dev);
+       systick.dev.min_delta_ns = clockevent_delta2ns(0x3, &systick.dev);
+       systick.dev.irq = irq_of_parse_and_map(np, 0);
+       if (!systick.dev.irq) {
+               pr_err("%s: request_irq failed", np->name);
+               return;
+       }
+
+       clocksource_mmio_init(systick.membase + SYSTICK_COUNT, np->name,
+                       SYSTICK_FREQ, 301, 16, clocksource_mmio_readl_up);
+
+       clockevents_register_device(&systick.dev);
+
+       pr_info("%s: runing - mult: %d, shift: %d\n",
+                       np->name, systick.dev.mult, systick.dev.shift);
+}
+
+CLOCKSOURCE_OF_DECLARE(systick, "ralink,cevt-systick", ralink_systick_init);
index 8dfa22f..bba0cdf 100644 (file)
@@ -69,4 +69,5 @@ void __init plat_time_init(void)
        pr_info("CPU Clock: %ldMHz\n", clk_get_rate(clk) / 1000000);
        mips_hpt_frequency = clk_get_rate(clk) / 2;
        clk_put(clk);
+       clocksource_of_init();
 }
index 83144c3..42dfd61 100644 (file)
@@ -46,6 +46,8 @@ extern void ralink_of_remap(void);
 extern void ralink_clk_init(void);
 extern void ralink_clk_add(const char *dev, unsigned long rate);
 
+extern void ralink_rst_init(void);
+
 extern void prom_soc_init(struct ralink_soc_info *soc_info);
 
 __iomem void *plat_of_remap_node(const char *node);
index 0018b1a..d217509 100644 (file)
@@ -23,9 +23,6 @@
 /* does the board have sdram or ddram */
 static int dram_type;
 
-/* the pll dividers */
-static u32 mt7620_clk_divider[] = { 2, 3, 4, 8 };
-
 static struct ralink_pinmux_grp mode_mux[] = {
        {
                .name = "i2c",
@@ -140,34 +137,189 @@ struct ralink_pinmux rt_gpio_pinmux = {
        .uart_mask = MT7620_GPIO_MODE_UART0_MASK,
 };
 
-void __init ralink_clk_init(void)
+static __init u32
+mt7620_calc_rate(u32 ref_rate, u32 mul, u32 div)
 {
-       unsigned long cpu_rate, sys_rate;
-       u32 c0 = rt_sysc_r32(SYSC_REG_CPLL_CONFIG0);
-       u32 c1 = rt_sysc_r32(SYSC_REG_CPLL_CONFIG1);
-       u32 swconfig = (c0 >> CPLL_SW_CONFIG_SHIFT) & CPLL_SW_CONFIG_MASK;
-       u32 cpu_clk = (c1 >> CPLL_CPU_CLK_SHIFT) & CPLL_CPU_CLK_MASK;
-
-       if (cpu_clk) {
-               cpu_rate = 480000000;
-       } else if (!swconfig) {
-               cpu_rate = 600000000;
-       } else {
-               u32 m = (c0 >> CPLL_MULT_RATIO_SHIFT) & CPLL_MULT_RATIO;
-               u32 d = (c0 >> CPLL_DIV_RATIO_SHIFT) & CPLL_DIV_RATIO;
+       u64 t;
 
-               cpu_rate = ((40 * (m + 24)) / mt7620_clk_divider[d]) * 1000000;
-       }
+       t = ref_rate;
+       t *= mul;
+       do_div(t, div);
+
+       return t;
+}
+
+#define MHZ(x)         ((x) * 1000 * 1000)
+
+static __init unsigned long
+mt7620_get_xtal_rate(void)
+{
+       u32 reg;
+
+       reg = rt_sysc_r32(SYSC_REG_SYSTEM_CONFIG0);
+       if (reg & SYSCFG0_XTAL_FREQ_SEL)
+               return MHZ(40);
+
+       return MHZ(20);
+}
+
+static __init unsigned long
+mt7620_get_periph_rate(unsigned long xtal_rate)
+{
+       u32 reg;
+
+       reg = rt_sysc_r32(SYSC_REG_CLKCFG0);
+       if (reg & CLKCFG0_PERI_CLK_SEL)
+               return xtal_rate;
+
+       return MHZ(40);
+}
+
+static const u32 mt7620_clk_divider[] __initconst = { 2, 3, 4, 8 };
+
+static __init unsigned long
+mt7620_get_cpu_pll_rate(unsigned long xtal_rate)
+{
+       u32 reg;
+       u32 mul;
+       u32 div;
+
+       reg = rt_sysc_r32(SYSC_REG_CPLL_CONFIG0);
+       if (reg & CPLL_CFG0_BYPASS_REF_CLK)
+               return xtal_rate;
+
+       if ((reg & CPLL_CFG0_SW_CFG) == 0)
+               return MHZ(600);
+
+       mul = (reg >> CPLL_CFG0_PLL_MULT_RATIO_SHIFT) &
+             CPLL_CFG0_PLL_MULT_RATIO_MASK;
+       mul += 24;
+       if (reg & CPLL_CFG0_LC_CURFCK)
+               mul *= 2;
+
+       div = (reg >> CPLL_CFG0_PLL_DIV_RATIO_SHIFT) &
+             CPLL_CFG0_PLL_DIV_RATIO_MASK;
+
+       WARN_ON(div >= ARRAY_SIZE(mt7620_clk_divider));
+
+       return mt7620_calc_rate(xtal_rate, mul, mt7620_clk_divider[div]);
+}
+
+static __init unsigned long
+mt7620_get_pll_rate(unsigned long xtal_rate, unsigned long cpu_pll_rate)
+{
+       u32 reg;
+
+       reg = rt_sysc_r32(SYSC_REG_CPLL_CONFIG1);
+       if (reg & CPLL_CFG1_CPU_AUX1)
+               return xtal_rate;
+
+       if (reg & CPLL_CFG1_CPU_AUX0)
+               return MHZ(480);
 
+       return cpu_pll_rate;
+}
+
+static __init unsigned long
+mt7620_get_cpu_rate(unsigned long pll_rate)
+{
+       u32 reg;
+       u32 mul;
+       u32 div;
+
+       reg = rt_sysc_r32(SYSC_REG_CPU_SYS_CLKCFG);
+
+       mul = reg & CPU_SYS_CLKCFG_CPU_FFRAC_MASK;
+       div = (reg >> CPU_SYS_CLKCFG_CPU_FDIV_SHIFT) &
+             CPU_SYS_CLKCFG_CPU_FDIV_MASK;
+
+       return mt7620_calc_rate(pll_rate, mul, div);
+}
+
+static const u32 mt7620_ocp_dividers[16] __initconst = {
+       [CPU_SYS_CLKCFG_OCP_RATIO_2] = 2,
+       [CPU_SYS_CLKCFG_OCP_RATIO_3] = 3,
+       [CPU_SYS_CLKCFG_OCP_RATIO_4] = 4,
+       [CPU_SYS_CLKCFG_OCP_RATIO_5] = 5,
+       [CPU_SYS_CLKCFG_OCP_RATIO_10] = 10,
+};
+
+static __init unsigned long
+mt7620_get_dram_rate(unsigned long pll_rate)
+{
        if (dram_type == SYSCFG0_DRAM_TYPE_SDRAM)
-               sys_rate = cpu_rate / 4;
-       else
-               sys_rate = cpu_rate / 3;
+               return pll_rate / 4;
+
+       return pll_rate / 3;
+}
+
+static __init unsigned long
+mt7620_get_sys_rate(unsigned long cpu_rate)
+{
+       u32 reg;
+       u32 ocp_ratio;
+       u32 div;
+
+       reg = rt_sysc_r32(SYSC_REG_CPU_SYS_CLKCFG);
+
+       ocp_ratio = (reg >> CPU_SYS_CLKCFG_OCP_RATIO_SHIFT) &
+                   CPU_SYS_CLKCFG_OCP_RATIO_MASK;
+
+       if (WARN_ON(ocp_ratio >= ARRAY_SIZE(mt7620_ocp_dividers)))
+               return cpu_rate;
+
+       div = mt7620_ocp_dividers[ocp_ratio];
+       if (WARN(!div, "invalid divider for OCP ratio %u", ocp_ratio))
+               return cpu_rate;
+
+       return cpu_rate / div;
+}
+
+void __init ralink_clk_init(void)
+{
+       unsigned long xtal_rate;
+       unsigned long cpu_pll_rate;
+       unsigned long pll_rate;
+       unsigned long cpu_rate;
+       unsigned long sys_rate;
+       unsigned long dram_rate;
+       unsigned long periph_rate;
+
+       xtal_rate = mt7620_get_xtal_rate();
+
+       cpu_pll_rate = mt7620_get_cpu_pll_rate(xtal_rate);
+       pll_rate = mt7620_get_pll_rate(xtal_rate, cpu_pll_rate);
+
+       cpu_rate = mt7620_get_cpu_rate(pll_rate);
+       dram_rate = mt7620_get_dram_rate(pll_rate);
+       sys_rate = mt7620_get_sys_rate(cpu_rate);
+       periph_rate = mt7620_get_periph_rate(xtal_rate);
+
+#define RFMT(label)    label ":%lu.%03luMHz "
+#define RINT(x)                ((x) / 1000000)
+#define RFRAC(x)       (((x) / 1000) % 1000)
+
+       pr_debug(RFMT("XTAL") RFMT("CPU_PLL") RFMT("PLL"),
+                RINT(xtal_rate), RFRAC(xtal_rate),
+                RINT(cpu_pll_rate), RFRAC(cpu_pll_rate),
+                RINT(pll_rate), RFRAC(pll_rate));
+
+       pr_debug(RFMT("CPU") RFMT("DRAM") RFMT("SYS") RFMT("PERIPH"),
+                RINT(cpu_rate), RFRAC(cpu_rate),
+                RINT(dram_rate), RFRAC(dram_rate),
+                RINT(sys_rate), RFRAC(sys_rate),
+                RINT(periph_rate), RFRAC(periph_rate));
+
+#undef RFRAC
+#undef RINT
+#undef RFMT
 
        ralink_clk_add("cpu", cpu_rate);
-       ralink_clk_add("10000100.timer", 40000000);
-       ralink_clk_add("10000500.uart", 40000000);
-       ralink_clk_add("10000c00.uartlite", 40000000);
+       ralink_clk_add("10000100.timer", periph_rate);
+       ralink_clk_add("10000120.watchdog", periph_rate);
+       ralink_clk_add("10000500.uart", periph_rate);
+       ralink_clk_add("10000b00.spi", sys_rate);
+       ralink_clk_add("10000c00.uartlite", periph_rate);
 }
 
 void __init ralink_of_remap(void)
@@ -214,16 +366,19 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
 
        switch (dram_type) {
        case SYSCFG0_DRAM_TYPE_SDRAM:
+               pr_info("Board has SDRAM\n");
                soc_info->mem_size_min = MT7620_SDRAM_SIZE_MIN;
                soc_info->mem_size_max = MT7620_SDRAM_SIZE_MAX;
                break;
 
        case SYSCFG0_DRAM_TYPE_DDR1:
+               pr_info("Board has DDR1\n");
                soc_info->mem_size_min = MT7620_DDR1_SIZE_MIN;
                soc_info->mem_size_max = MT7620_DDR1_SIZE_MAX;
                break;
 
        case SYSCFG0_DRAM_TYPE_DDR2:
+               pr_info("Board has DDR2\n");
                soc_info->mem_size_min = MT7620_DDR2_SIZE_MIN;
                soc_info->mem_size_max = MT7620_DDR2_SIZE_MAX;
                break;
index f25ea5b..ce38d11 100644 (file)
@@ -110,6 +110,9 @@ static int __init plat_of_setup(void)
        if (of_platform_populate(NULL, of_ids, NULL, NULL))
                panic("failed to populate DT\n");
 
+       /* make sure ithat the reset controller is setup early */
+       ralink_rst_init();
+
        return 0;
 }
 
index 22120e5..55c7ec5 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/pm.h>
 #include <linux/io.h>
+#include <linux/of.h>
+#include <linux/reset-controller.h>
 
 #include <asm/reboot.h>
 
 #define SYSC_REG_RESET_CTRL     0x034
 #define RSTCTL_RESET_SYSTEM     BIT(0)
 
+static int ralink_assert_device(struct reset_controller_dev *rcdev,
+                               unsigned long id)
+{
+       u32 val;
+
+       if (id < 8)
+               return -1;
+
+       val = rt_sysc_r32(SYSC_REG_RESET_CTRL);
+       val |= BIT(id);
+       rt_sysc_w32(val, SYSC_REG_RESET_CTRL);
+
+       return 0;
+}
+
+static int ralink_deassert_device(struct reset_controller_dev *rcdev,
+                                 unsigned long id)
+{
+       u32 val;
+
+       if (id < 8)
+               return -1;
+
+       val = rt_sysc_r32(SYSC_REG_RESET_CTRL);
+       val &= ~BIT(id);
+       rt_sysc_w32(val, SYSC_REG_RESET_CTRL);
+
+       return 0;
+}
+
+static int ralink_reset_device(struct reset_controller_dev *rcdev,
+                              unsigned long id)
+{
+       ralink_assert_device(rcdev, id);
+       return ralink_deassert_device(rcdev, id);
+}
+
+static struct reset_control_ops reset_ops = {
+       .reset = ralink_reset_device,
+       .assert = ralink_assert_device,
+       .deassert = ralink_deassert_device,
+};
+
+static struct reset_controller_dev reset_dev = {
+       .ops                    = &reset_ops,
+       .owner                  = THIS_MODULE,
+       .nr_resets              = 32,
+       .of_reset_n_cells       = 1,
+};
+
+void ralink_rst_init(void)
+{
+       reset_dev.of_node = of_find_compatible_node(NULL, NULL,
+                                               "ralink,rt2880-reset");
+       if (!reset_dev.of_node)
+               pr_err("Failed to find reset controller node");
+       else
+               reset_controller_register(&reset_dev);
+}
+
 static void ralink_restart(char *command)
 {
        local_irq_disable();
diff --git a/arch/mips/ralink/timer.c b/arch/mips/ralink/timer.c
new file mode 100644 (file)
index 0000000..e49241a
--- /dev/null
@@ -0,0 +1,185 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * Copyright (C) 2013 John Crispin <blogic@openwrt.org>
+*/
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/interrupt.h>
+#include <linux/timer.h>
+#include <linux/of_gpio.h>
+#include <linux/clk.h>
+
+#include <asm/mach-ralink/ralink_regs.h>
+
+#define TIMER_REG_TMRSTAT              0x00
+#define TIMER_REG_TMR0LOAD             0x10
+#define TIMER_REG_TMR0CTL              0x18
+
+#define TMRSTAT_TMR0INT                        BIT(0)
+
+#define TMR0CTL_ENABLE                 BIT(7)
+#define TMR0CTL_MODE_PERIODIC          BIT(4)
+#define TMR0CTL_PRESCALER              1
+#define TMR0CTL_PRESCALE_VAL           (0xf - TMR0CTL_PRESCALER)
+#define TMR0CTL_PRESCALE_DIV           (65536 / BIT(TMR0CTL_PRESCALER))
+
+struct rt_timer {
+       struct device   *dev;
+       void __iomem    *membase;
+       int             irq;
+       unsigned long   timer_freq;
+       unsigned long   timer_div;
+};
+
+static inline void rt_timer_w32(struct rt_timer *rt, u8 reg, u32 val)
+{
+       __raw_writel(val, rt->membase + reg);
+}
+
+static inline u32 rt_timer_r32(struct rt_timer *rt, u8 reg)
+{
+       return __raw_readl(rt->membase + reg);
+}
+
+static irqreturn_t rt_timer_irq(int irq, void *_rt)
+{
+       struct rt_timer *rt =  (struct rt_timer *) _rt;
+
+       rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div);
+       rt_timer_w32(rt, TIMER_REG_TMRSTAT, TMRSTAT_TMR0INT);
+
+       return IRQ_HANDLED;
+}
+
+
+static int rt_timer_request(struct rt_timer *rt)
+{
+       int err = request_irq(rt->irq, rt_timer_irq, IRQF_DISABLED,
+                                               dev_name(rt->dev), rt);
+       if (err) {
+               dev_err(rt->dev, "failed to request irq\n");
+       } else {
+               u32 t = TMR0CTL_MODE_PERIODIC | TMR0CTL_PRESCALE_VAL;
+               rt_timer_w32(rt, TIMER_REG_TMR0CTL, t);
+       }
+       return err;
+}
+
+static void rt_timer_free(struct rt_timer *rt)
+{
+       free_irq(rt->irq, rt);
+}
+
+static int rt_timer_config(struct rt_timer *rt, unsigned long divisor)
+{
+       if (rt->timer_freq < divisor)
+               rt->timer_div = rt->timer_freq;
+       else
+               rt->timer_div = divisor;
+
+       rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div);
+
+       return 0;
+}
+
+static int rt_timer_enable(struct rt_timer *rt)
+{
+       u32 t;
+
+       rt_timer_w32(rt, TIMER_REG_TMR0LOAD, rt->timer_freq / rt->timer_div);
+
+       t = rt_timer_r32(rt, TIMER_REG_TMR0CTL);
+       t |= TMR0CTL_ENABLE;
+       rt_timer_w32(rt, TIMER_REG_TMR0CTL, t);
+
+       return 0;
+}
+
+static void rt_timer_disable(struct rt_timer *rt)
+{
+       u32 t;
+
+       t = rt_timer_r32(rt, TIMER_REG_TMR0CTL);
+       t &= ~TMR0CTL_ENABLE;
+       rt_timer_w32(rt, TIMER_REG_TMR0CTL, t);
+}
+
+static int rt_timer_probe(struct platform_device *pdev)
+{
+       struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       struct rt_timer *rt;
+       struct clk *clk;
+
+       rt = devm_kzalloc(&pdev->dev, sizeof(*rt), GFP_KERNEL);
+       if (!rt) {
+               dev_err(&pdev->dev, "failed to allocate memory\n");
+               return -ENOMEM;
+       }
+
+       rt->irq = platform_get_irq(pdev, 0);
+       if (!rt->irq) {
+               dev_err(&pdev->dev, "failed to load irq\n");
+               return -ENOENT;
+       }
+
+       rt->membase = devm_request_and_ioremap(&pdev->dev, res);
+       if (IS_ERR(rt->membase))
+               return PTR_ERR(rt->membase);
+
+       clk = devm_clk_get(&pdev->dev, NULL);
+       if (IS_ERR(clk)) {
+               dev_err(&pdev->dev, "failed get clock rate\n");
+               return PTR_ERR(clk);
+       }
+
+       rt->timer_freq = clk_get_rate(clk) / TMR0CTL_PRESCALE_DIV;
+       if (!rt->timer_freq)
+               return -EINVAL;
+
+       rt->dev = &pdev->dev;
+       platform_set_drvdata(pdev, rt);
+
+       rt_timer_request(rt);
+       rt_timer_config(rt, 2);
+       rt_timer_enable(rt);
+
+       dev_info(&pdev->dev, "maximum frequncy is %luHz\n", rt->timer_freq);
+
+       return 0;
+}
+
+static int rt_timer_remove(struct platform_device *pdev)
+{
+       struct rt_timer *rt = platform_get_drvdata(pdev);
+
+       rt_timer_disable(rt);
+       rt_timer_free(rt);
+
+       return 0;
+}
+
+static const struct of_device_id rt_timer_match[] = {
+       { .compatible = "ralink,rt2880-timer" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, rt_timer_match);
+
+static struct platform_driver rt_timer_driver = {
+       .probe = rt_timer_probe,
+       .remove = rt_timer_remove,
+       .driver = {
+               .name           = "rt-timer",
+               .owner          = THIS_MODULE,
+               .of_match_table = rt_timer_match
+       },
+};
+
+module_platform_driver(rt_timer_driver);
+
+MODULE_DESCRIPTION("Ralink RT2880 timer");
+MODULE_AUTHOR("John Crispin <blogic@openwrt.org");
+MODULE_LICENSE("GPL");
index 681e7f8..2b0b83c 100644 (file)
@@ -350,7 +350,7 @@ static void __init select_board(void)
        }
 
        /* select "default" board */
-#ifdef CONFIG_CPU_TX39XX
+#ifdef CONFIG_TOSHIBA_JMR3927
        txx9_board_vec = &jmr3927_vec;
 #endif
 #ifdef CONFIG_CPU_TX49XX
index 70e4f66..6aaa160 100644 (file)
@@ -1,7 +1,6 @@
 config MN10300
        def_bool y
        select HAVE_OPROFILE
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
        select ARCH_WANT_IPC_PARSE_VERSION
        select HAVE_ARCH_TRACEHOOK
index 8a2e6de..3516cbd 100644 (file)
@@ -171,6 +171,8 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long fault_code,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if ((fault_code & MMUFCR_xFC_ACCESS) == MMUFCR_xFC_ACCESS_USR)
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
 
index d60bf98..9488209 100644 (file)
@@ -11,7 +11,6 @@ config OPENRISC
        select HAVE_MEMBLOCK
        select ARCH_REQUIRE_GPIOLIB
         select HAVE_ARCH_TRACEHOOK
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_CHIP
        select GENERIC_IRQ_PROBE
        select GENERIC_IRQ_SHOW
index 4a41f84..0703acf 100644 (file)
@@ -86,6 +86,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
        if (user_mode(regs)) {
                /* Exception was in userspace: reenable interrupts */
                local_irq_enable();
+               flags |= FAULT_FLAG_USER;
        } else {
                /* If exception was in a syscall, then IRQ's may have
                 * been enabled or disabled.  If they were enabled,
index aa399a5..ad2ce8d 100644 (file)
@@ -14,7 +14,6 @@ config PARISC
        select HAVE_PERF_EVENTS
        select GENERIC_ATOMIC64 if !64BIT
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
-       select HAVE_GENERIC_HARDIRQS
        select BROKEN_RODATA
        select GENERIC_IRQ_PROBE
        select GENERIC_PCI_IOMAP
index f247a34..d10d27a 100644 (file)
@@ -180,6 +180,10 @@ void do_page_fault(struct pt_regs *regs, unsigned long code,
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+       if (acc_type & VM_WRITE)
+               flags |= FAULT_FLAG_WRITE;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma_prev(mm, address, &prev_vma);
@@ -203,8 +207,7 @@ good_area:
         * fault.
         */
 
-       fault = handle_mm_fault(mm, vma, address,
-                       flags | ((acc_type & VM_WRITE) ? FAULT_FLAG_WRITE : 0));
+       fault = handle_mm_fault(mm, vma, address, flags);
 
        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
                return;
index 6b7530f..38f3b7e 100644 (file)
@@ -114,7 +114,6 @@ config PPC
        select HAVE_PERF_EVENTS
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_HW_BREAKPOINT if PERF_EVENTS && PPC_BOOK3S_64
-       select HAVE_GENERIC_HARDIRQS
        select ARCH_WANT_IPC_PARSE_VERSION
        select SPARSE_IRQ
        select IRQ_DOMAIN
index 77e97dd..38faede 100644 (file)
@@ -28,6 +28,9 @@ struct dev_archdata {
                void            *iommu_table_base;
        } dma_data;
 
+#ifdef CONFIG_IOMMU_API
+       void                    *iommu_domain;
+#endif
 #ifdef CONFIG_SWIOTLB
        dma_addr_t              max_direct_dma_addr;
 #endif
diff --git a/arch/powerpc/include/asm/fsl_pamu_stash.h b/arch/powerpc/include/asm/fsl_pamu_stash.h
new file mode 100644 (file)
index 0000000..caa1b21
--- /dev/null
@@ -0,0 +1,39 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_STASH_H
+#define __FSL_PAMU_STASH_H
+
+/* cache stash targets */
+enum pamu_stash_target {
+       PAMU_ATTR_CACHE_L1 = 1,
+       PAMU_ATTR_CACHE_L2,
+       PAMU_ATTR_CACHE_L3,
+};
+
+/*
+ * This attribute allows configuring stashig specific parameters
+ * in the PAMU hardware.
+ */
+
+struct pamu_stash_attribute {
+       u32     cpu;    /* cpu number */
+       u32     cache;  /* cache to stash to: L1,L2,L3 */
+};
+
+#endif  /* __FSL_PAMU_STASH_H */
index 2dd69bf..51ab9e7 100644 (file)
@@ -223,9 +223,6 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
        is_write = error_code & ESR_DST;
 #endif /* CONFIG_4xx || CONFIG_BOOKE */
 
-       if (is_write)
-               flags |= FAULT_FLAG_WRITE;
-
 #ifdef CONFIG_PPC_ICSWX
        /*
         * we need to do this early because this "data storage
@@ -288,6 +285,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address,
        if (user_mode(regs))
                store_update_sp = store_updates_sp(regs);
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+
        /* When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
         * kernel and should generate an OOPS.  Unfortunately, in the case of an
@@ -415,6 +415,7 @@ good_area:
        } else if (is_write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        /* a read */
        } else {
                /* protection fault */
index 834ca8e..d67db4b 100644 (file)
@@ -86,6 +86,11 @@ int pgd_huge(pgd_t pgd)
         */
        return ((pgd_val(pgd) & 0x3) != 0x0);
 }
+
+int pmd_huge_support(void)
+{
+       return 1;
+}
 #else
 int pmd_huge(pmd_t pmd)
 {
@@ -101,6 +106,11 @@ int pgd_huge(pgd_t pgd)
 {
        return 0;
 }
+
+int pmd_huge_support(void)
+{
+       return 0;
+}
 #endif
 
 pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
index defc422..8d455df 100644 (file)
 
 struct platform_device;
 
+
+/* FSL PCI controller BRR1 register */
+#define PCI_FSL_BRR1      0xbf8
+#define PCI_FSL_BRR1_VER 0xffff
+
 #define PCIE_LTSSM     0x0404          /* PCIE Link Training and Status */
 #define PCIE_LTSSM_L0  0x16            /* L0 state */
 #define PCIE_IP_REV_2_2                0x02080202 /* PCIE IP block version Rev2.2 */
index fb2723e..dcc6ac2 100644 (file)
@@ -116,7 +116,6 @@ config S390
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_TRACE_MCOUNT_TEST
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZ4
@@ -526,6 +525,7 @@ config CRASH_DUMP
        bool "kernel crash dumps"
        depends on 64BIT && SMP
        select KEXEC
+       select ZFCPDUMP
        help
          Generate crash dump after being started by kexec.
          Crash dump kernels are loaded in the main kernel with kexec-tools
@@ -536,7 +536,7 @@ config CRASH_DUMP
 config ZFCPDUMP
        def_bool n
        prompt "zfcpdump support"
-       select SMP
+       depends on SMP
        help
          Select this option if you want to build an zfcpdump enabled kernel.
          Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
index dcf6948..4176dfe 100644 (file)
@@ -31,6 +31,8 @@
 #include <linux/ptrace.h>
 #include <linux/percpu.h>
 
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+
 struct pt_regs;
 struct kprobe;
 
@@ -57,7 +59,7 @@ typedef u16 kprobe_opcode_t;
 /* Architecture specific copy of original instruction */
 struct arch_specific_insn {
        /* copy of original instruction */
-       kprobe_opcode_t insn[MAX_INSN_SIZE];
+       kprobe_opcode_t *insn;
 };
 
 struct prev_kprobe {
index 06a1361..7dc7f9c 100644 (file)
@@ -56,5 +56,6 @@ bool sclp_has_linemode(void);
 bool sclp_has_vt220(void);
 int sclp_pci_configure(u32 fid);
 int sclp_pci_deconfigure(u32 fid);
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode);
 
 #endif /* _ASM_S390_SCLP_H */
index d8f3556..c84f33d 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/os_info.h>
 #include <asm/elf.h>
 #include <asm/ipl.h>
+#include <asm/sclp.h>
 
 #define PTR_ADD(x, y) (((char *) (x)) + ((unsigned long) (y)))
 #define PTR_SUB(x, y) (((char *) (x)) - ((unsigned long) (y)))
@@ -64,22 +65,46 @@ static ssize_t copy_page_real(void *buf, void *src, size_t csize)
 }
 
 /*
- * Copy one page from "oldmem"
+ * Pointer to ELF header in new kernel
+ */
+static void *elfcorehdr_newmem;
+
+/*
+ * Copy one page from zfcpdump "oldmem"
+ *
+ * For pages below ZFCPDUMP_HSA_SIZE memory from the HSA is copied. Otherwise
+ * real memory copy is used.
+ */
+static ssize_t copy_oldmem_page_zfcpdump(char *buf, size_t csize,
+                                        unsigned long src, int userbuf)
+{
+       int rc;
+
+       if (src < ZFCPDUMP_HSA_SIZE) {
+               rc = memcpy_hsa(buf, src, csize, userbuf);
+       } else {
+               if (userbuf)
+                       rc = copy_to_user_real((void __force __user *) buf,
+                                              (void *) src, csize);
+               else
+                       rc = memcpy_real(buf, (void *) src, csize);
+       }
+       return rc ? rc : csize;
+}
+
+/*
+ * Copy one page from kdump "oldmem"
  *
  * For the kdump reserved memory this functions performs a swap operation:
  *  - [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE] is mapped to [0 - OLDMEM_SIZE].
  *  - [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
  */
-ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
-                        size_t csize, unsigned long offset, int userbuf)
+static ssize_t copy_oldmem_page_kdump(char *buf, size_t csize,
+                                     unsigned long src, int userbuf)
+
 {
-       unsigned long src;
        int rc;
 
-       if (!csize)
-               return 0;
-
-       src = (pfn << PAGE_SHIFT) + offset;
        if (src < OLDMEM_SIZE)
                src += OLDMEM_BASE;
        else if (src > OLDMEM_BASE &&
@@ -90,7 +115,88 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
                                       (void *) src, csize);
        else
                rc = copy_page_real(buf, (void *) src, csize);
-       return (rc == 0) ? csize : rc;
+       return (rc == 0) ? rc : csize;
+}
+
+/*
+ * Copy one page from "oldmem"
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf, size_t csize,
+                        unsigned long offset, int userbuf)
+{
+       unsigned long src;
+
+       if (!csize)
+               return 0;
+       src = (pfn << PAGE_SHIFT) + offset;
+       if (OLDMEM_BASE)
+               return copy_oldmem_page_kdump(buf, csize, src, userbuf);
+       else
+               return copy_oldmem_page_zfcpdump(buf, csize, src, userbuf);
+}
+
+/*
+ * Remap "oldmem" for kdump
+ *
+ * For the kdump reserved memory this functions performs a swap operation:
+ * [0 - OLDMEM_SIZE] is mapped to [OLDMEM_BASE - OLDMEM_BASE + OLDMEM_SIZE]
+ */
+static int remap_oldmem_pfn_range_kdump(struct vm_area_struct *vma,
+                                       unsigned long from, unsigned long pfn,
+                                       unsigned long size, pgprot_t prot)
+{
+       unsigned long size_old;
+       int rc;
+
+       if (pfn < OLDMEM_SIZE >> PAGE_SHIFT) {
+               size_old = min(size, OLDMEM_SIZE - (pfn << PAGE_SHIFT));
+               rc = remap_pfn_range(vma, from,
+                                    pfn + (OLDMEM_BASE >> PAGE_SHIFT),
+                                    size_old, prot);
+               if (rc || size == size_old)
+                       return rc;
+               size -= size_old;
+               from += size_old;
+               pfn += size_old >> PAGE_SHIFT;
+       }
+       return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for zfcpdump
+ *
+ * We only map available memory above ZFCPDUMP_HSA_SIZE. Memory below
+ * ZFCPDUMP_HSA_SIZE is read on demand using the copy_oldmem_page() function.
+ */
+static int remap_oldmem_pfn_range_zfcpdump(struct vm_area_struct *vma,
+                                          unsigned long from,
+                                          unsigned long pfn,
+                                          unsigned long size, pgprot_t prot)
+{
+       unsigned long size_hsa;
+
+       if (pfn < ZFCPDUMP_HSA_SIZE >> PAGE_SHIFT) {
+               size_hsa = min(size, ZFCPDUMP_HSA_SIZE - (pfn << PAGE_SHIFT));
+               if (size == size_hsa)
+                       return 0;
+               size -= size_hsa;
+               from += size_hsa;
+               pfn += size_hsa >> PAGE_SHIFT;
+       }
+       return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Remap "oldmem" for kdump or zfcpdump
+ */
+int remap_oldmem_pfn_range(struct vm_area_struct *vma, unsigned long from,
+                          unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+       if (OLDMEM_BASE)
+               return remap_oldmem_pfn_range_kdump(vma, from, pfn, size, prot);
+       else
+               return remap_oldmem_pfn_range_zfcpdump(vma, from, pfn, size,
+                                                      prot);
 }
 
 /*
@@ -101,11 +207,21 @@ int copy_from_oldmem(void *dest, void *src, size_t count)
        unsigned long copied = 0;
        int rc;
 
-       if ((unsigned long) src < OLDMEM_SIZE) {
-               copied = min(count, OLDMEM_SIZE - (unsigned long) src);
-               rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
-               if (rc)
-                       return rc;
+       if (OLDMEM_BASE) {
+               if ((unsigned long) src < OLDMEM_SIZE) {
+                       copied = min(count, OLDMEM_SIZE - (unsigned long) src);
+                       rc = memcpy_real(dest, src + OLDMEM_BASE, copied);
+                       if (rc)
+                               return rc;
+               }
+       } else {
+               if ((unsigned long) src < ZFCPDUMP_HSA_SIZE) {
+                       copied = min(count,
+                                    ZFCPDUMP_HSA_SIZE - (unsigned long) src);
+                       rc = memcpy_hsa(dest, (unsigned long) src, copied, 0);
+                       if (rc)
+                               return rc;
+               }
        }
        return memcpy_real(dest + copied, src + copied, count - copied);
 }
@@ -367,14 +483,6 @@ static int get_mem_chunk_cnt(void)
        return cnt;
 }
 
-/*
- * Relocate pointer in order to allow vmcore code access the data
- */
-static inline unsigned long relocate(unsigned long addr)
-{
-       return OLDMEM_BASE + addr;
-}
-
 /*
  * Initialize ELF loads (new kernel)
  */
@@ -426,7 +534,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
        ptr = nt_vmcoreinfo(ptr);
        memset(phdr, 0, sizeof(*phdr));
        phdr->p_type = PT_NOTE;
-       phdr->p_offset = relocate(notes_offset);
+       phdr->p_offset = notes_offset;
        phdr->p_filesz = (unsigned long) PTR_SUB(ptr, ptr_start);
        phdr->p_memsz = phdr->p_filesz;
        return ptr;
@@ -435,7 +543,7 @@ static void *notes_init(Elf64_Phdr *phdr, void *ptr, u64 notes_offset)
 /*
  * Create ELF core header (new kernel)
  */
-static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
+int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
 {
        Elf64_Phdr *phdr_notes, *phdr_loads;
        int mem_chunk_cnt;
@@ -443,6 +551,12 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
        u32 alloc_size;
        u64 hdr_off;
 
+       /* If we are not in kdump or zfcpdump mode return */
+       if (!OLDMEM_BASE && ipl_info.type != IPL_TYPE_FCP_DUMP)
+               return 0;
+       /* If elfcorehdr= has been passed via cmdline, we use that one */
+       if (elfcorehdr_addr != ELFCORE_ADDR_MAX)
+               return 0;
        mem_chunk_cnt = get_mem_chunk_cnt();
 
        alloc_size = 0x1000 + get_cpu_cnt() * 0x300 +
@@ -460,27 +574,52 @@ static void s390_elf_corehdr_create(char **elfcorebuf, size_t *elfcorebuf_sz)
        ptr = notes_init(phdr_notes, ptr, ((unsigned long) hdr) + hdr_off);
        /* Init loads */
        hdr_off = PTR_DIFF(ptr, hdr);
-       loads_init(phdr_loads, ((unsigned long) hdr) + hdr_off);
-       *elfcorebuf_sz = hdr_off;
-       *elfcorebuf = (void *) relocate((unsigned long) hdr);
-       BUG_ON(*elfcorebuf_sz > alloc_size);
+       loads_init(phdr_loads, hdr_off);
+       *addr = (unsigned long long) hdr;
+       elfcorehdr_newmem = hdr;
+       *size = (unsigned long long) hdr_off;
+       BUG_ON(elfcorehdr_size > alloc_size);
+       return 0;
 }
 
 /*
- * Create kdump ELF core header in new kernel, if it has not been passed via
- * the "elfcorehdr" kernel parameter
+ * Free ELF core header (new kernel)
  */
-static int setup_kdump_elfcorehdr(void)
+void elfcorehdr_free(unsigned long long addr)
 {
-       size_t elfcorebuf_sz;
-       char *elfcorebuf;
+       if (!elfcorehdr_newmem)
+               return;
+       kfree((void *)(unsigned long)addr);
+}
 
-       if (!OLDMEM_BASE || is_kdump_kernel())
-               return -EINVAL;
-       s390_elf_corehdr_create(&elfcorebuf, &elfcorebuf_sz);
-       elfcorehdr_addr = (unsigned long long) elfcorebuf;
-       elfcorehdr_size = elfcorebuf_sz;
-       return 0;
+/*
+ * Read from ELF header
+ */
+ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+       void *src = (void *)(unsigned long)*ppos;
+
+       src = elfcorehdr_newmem ? src : src - OLDMEM_BASE;
+       memcpy(buf, src, count);
+       *ppos += count;
+       return count;
 }
 
-subsys_initcall(setup_kdump_elfcorehdr);
+/*
+ * Read from ELF notes data
+ */
+ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+       void *src = (void *)(unsigned long)*ppos;
+       int rc;
+
+       if (elfcorehdr_newmem) {
+               memcpy(buf, src, count);
+       } else {
+               rc = copy_from_oldmem(buf, src, count);
+               if (rc)
+                       return rc;
+       }
+       *ppos += count;
+       return count;
+}
index adbbe7f..0ce9fb2 100644 (file)
@@ -37,6 +37,26 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
 
 struct kretprobe_blackpoint kretprobe_blacklist[] = { };
 
+DEFINE_INSN_CACHE_OPS(dmainsn);
+
+static void *alloc_dmainsn_page(void)
+{
+       return (void *)__get_free_page(GFP_KERNEL | GFP_DMA);
+}
+
+static void free_dmainsn_page(void *page)
+{
+       free_page((unsigned long)page);
+}
+
+struct kprobe_insn_cache kprobe_dmainsn_slots = {
+       .mutex = __MUTEX_INITIALIZER(kprobe_dmainsn_slots.mutex),
+       .alloc = alloc_dmainsn_page,
+       .free = free_dmainsn_page,
+       .pages = LIST_HEAD_INIT(kprobe_dmainsn_slots.pages),
+       .insn_size = MAX_INSN_SIZE,
+};
+
 static int __kprobes is_prohibited_opcode(kprobe_opcode_t *insn)
 {
        switch (insn[0] >> 8) {
@@ -100,9 +120,8 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
                        fixup |= FIXUP_RETURN_REGISTER;
                break;
        case 0xc0:
-               if ((insn[0] & 0x0f) == 0x00 || /* larl  */
-                   (insn[0] & 0x0f) == 0x05)   /* brasl */
-               fixup |= FIXUP_RETURN_REGISTER;
+               if ((insn[0] & 0x0f) == 0x05)   /* brasl */
+                       fixup |= FIXUP_RETURN_REGISTER;
                break;
        case 0xeb:
                switch (insn[2] & 0xff) {
@@ -134,18 +153,128 @@ static int __kprobes get_fixup_type(kprobe_opcode_t *insn)
        return fixup;
 }
 
+static int __kprobes is_insn_relative_long(kprobe_opcode_t *insn)
+{
+       /* Check if we have a RIL-b or RIL-c format instruction which
+        * we need to modify in order to avoid instruction emulation. */
+       switch (insn[0] >> 8) {
+       case 0xc0:
+               if ((insn[0] & 0x0f) == 0x00) /* larl */
+                       return true;
+               break;
+       case 0xc4:
+               switch (insn[0] & 0x0f) {
+               case 0x02: /* llhrl  */
+               case 0x04: /* lghrl  */
+               case 0x05: /* lhrl   */
+               case 0x06: /* llghrl */
+               case 0x07: /* sthrl  */
+               case 0x08: /* lgrl   */
+               case 0x0b: /* stgrl  */
+               case 0x0c: /* lgfrl  */
+               case 0x0d: /* lrl    */
+               case 0x0e: /* llgfrl */
+               case 0x0f: /* strl   */
+                       return true;
+               }
+               break;
+       case 0xc6:
+               switch (insn[0] & 0x0f) {
+               case 0x00: /* exrl   */
+               case 0x02: /* pfdrl  */
+               case 0x04: /* cghrl  */
+               case 0x05: /* chrl   */
+               case 0x06: /* clghrl */
+               case 0x07: /* clhrl  */
+               case 0x08: /* cgrl   */
+               case 0x0a: /* clgrl  */
+               case 0x0c: /* cgfrl  */
+               case 0x0d: /* crl    */
+               case 0x0e: /* clgfrl */
+               case 0x0f: /* clrl   */
+                       return true;
+               }
+               break;
+       }
+       return false;
+}
+
+static void __kprobes copy_instruction(struct kprobe *p)
+{
+       s64 disp, new_disp;
+       u64 addr, new_addr;
+
+       memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
+       if (!is_insn_relative_long(p->ainsn.insn))
+               return;
+       /*
+        * For pc-relative instructions in RIL-b or RIL-c format patch the
+        * RI2 displacement field. We have already made sure that the insn
+        * slot for the patched instruction is within the same 2GB area
+        * as the original instruction (either kernel image or module area).
+        * Therefore the new displacement will always fit.
+        */
+       disp = *(s32 *)&p->ainsn.insn[1];
+       addr = (u64)(unsigned long)p->addr;
+       new_addr = (u64)(unsigned long)p->ainsn.insn;
+       new_disp = ((addr + (disp * 2)) - new_addr) / 2;
+       *(s32 *)&p->ainsn.insn[1] = new_disp;
+}
+
+static inline int is_kernel_addr(void *addr)
+{
+       return addr < (void *)_end;
+}
+
+static inline int is_module_addr(void *addr)
+{
+#ifdef CONFIG_64BIT
+       BUILD_BUG_ON(MODULES_LEN > (1UL << 31));
+       if (addr < (void *)MODULES_VADDR)
+               return 0;
+       if (addr > (void *)MODULES_END)
+               return 0;
+#endif
+       return 1;
+}
+
+static int __kprobes s390_get_insn_slot(struct kprobe *p)
+{
+       /*
+        * Get an insn slot that is within the same 2GB area like the original
+        * instruction. That way instructions with a 32bit signed displacement
+        * field can be patched and executed within the insn slot.
+        */
+       p->ainsn.insn = NULL;
+       if (is_kernel_addr(p->addr))
+               p->ainsn.insn = get_dmainsn_slot();
+       if (is_module_addr(p->addr))
+               p->ainsn.insn = get_insn_slot();
+       return p->ainsn.insn ? 0 : -ENOMEM;
+}
+
+static void __kprobes s390_free_insn_slot(struct kprobe *p)
+{
+       if (!p->ainsn.insn)
+               return;
+       if (is_kernel_addr(p->addr))
+               free_dmainsn_slot(p->ainsn.insn, 0);
+       else
+               free_insn_slot(p->ainsn.insn, 0);
+       p->ainsn.insn = NULL;
+}
+
 int __kprobes arch_prepare_kprobe(struct kprobe *p)
 {
        if ((unsigned long) p->addr & 0x01)
                return -EINVAL;
-
        /* Make sure the probe isn't going on a difficult instruction */
        if (is_prohibited_opcode(p->addr))
                return -EINVAL;
-
+       if (s390_get_insn_slot(p))
+               return -ENOMEM;
        p->opcode = *p->addr;
-       memcpy(p->ainsn.insn, p->addr, ((p->opcode >> 14) + 3) & -2);
-
+       copy_instruction(p);
        return 0;
 }
 
@@ -186,6 +315,7 @@ void __kprobes arch_disarm_kprobe(struct kprobe *p)
 
 void __kprobes arch_remove_kprobe(struct kprobe *p)
 {
+       s390_free_insn_slot(p);
 }
 
 static void __kprobes enable_singlestep(struct kprobe_ctlblk *kcb,
index 7de4469..fc66792 100644 (file)
@@ -302,6 +302,8 @@ static inline int do_exception(struct pt_regs *regs, int access)
        address = trans_exc_code & __FAIL_ADDR_MASK;
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
        flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
        if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
                flags |= FAULT_FLAG_WRITE;
        down_read(&mm->mmap_sem);
index 248445f..d261c62 100644 (file)
@@ -223,6 +223,11 @@ int pud_huge(pud_t pud)
        return 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                             pmd_t *pmdp, int write)
 {
index 5fc2375..a1be70d 100644 (file)
@@ -2,7 +2,6 @@ menu "Machine selection"
 
 config SCORE
        def_bool y
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
        select GENERIC_IOMAP
        select GENERIC_ATOMIC64
index 6b18fb0..5223898 100644 (file)
@@ -47,6 +47,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
        const int field = sizeof(unsigned long) * 2;
+       unsigned long flags = 0;
        siginfo_t info;
        int fault;
 
@@ -75,6 +76,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
        if (in_atomic() || !mm)
                goto bad_area_nosemaphore;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
        if (!vma)
@@ -95,18 +99,18 @@ good_area:
        if (write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
                        goto bad_area;
        }
 
-survive:
        /*
        * If for any reason at all we couldn't handle the fault,
        * make sure we exit gracefully rather than endlessly redo
        * the fault.
        */
-       fault = handle_mm_fault(mm, vma, address, write);
+       fault = handle_mm_fault(mm, vma, address, flags);
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
                        goto out_of_memory;
@@ -167,11 +171,6 @@ no_context:
        */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_global_init(tsk)) {
-               yield();
-               down_read(&mm->mmap_sem);
-               goto survive;
-       }
        if (!user_mode(regs))
                goto no_context;
        pagefault_out_of_memory();
index 1018ed3..224f4bc 100644 (file)
@@ -26,7 +26,6 @@ config SUPERH
        select ARCH_WANT_IPC_PARSE_VERSION
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_REGS_AND_STACK_ACCESS_API
-       select HAVE_GENERIC_HARDIRQS
        select MAY_HAVE_SPARSE_IRQ
        select IRQ_FORCED_THREADING
        select RTC_LIB
index 1f49c28..541dc61 100644 (file)
@@ -400,9 +400,7 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
        struct mm_struct *mm;
        struct vm_area_struct * vma;
        int fault;
-       int write = error_code & FAULT_CODE_WRITE;
-       unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                             (write ? FAULT_FLAG_WRITE : 0));
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        tsk = current;
        mm = tsk->mm;
@@ -476,6 +474,11 @@ good_area:
 
        set_thread_fault_code(error_code);
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+       if (error_code & FAULT_CODE_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
index d776234..0d676a4 100644 (file)
@@ -83,6 +83,11 @@ int pud_huge(pud_t pud)
        return 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                             pmd_t *pmd, int write)
 {
index 1570ad2..2137ad6 100644 (file)
@@ -26,7 +26,6 @@ config SPARC
        select HAVE_DMA_ATTRS
        select HAVE_DMA_API_DEBUG
        select HAVE_ARCH_JUMP_LABEL
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_SHOW
        select ARCH_WANT_IPC_PARSE_VERSION
        select USE_GENERIC_SMP_HELPERS if SMP
index 3d0ddbc..7136885 100644 (file)
@@ -169,10 +169,10 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
                new_ka.ka_restorer = restorer;
                ret = get_user(u_handler, &act->sa_handler);
                new_ka.sa.sa_handler =  compat_ptr(u_handler);
-               ret |= __copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t));
+               ret |= copy_from_user(&set32, &act->sa_mask, sizeof(compat_sigset_t));
                sigset_from_compat(&new_ka.sa.sa_mask, &set32);
-               ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
-               ret |= __get_user(u_restorer, &act->sa_restorer);
+               ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
+               ret |= get_user(u_restorer, &act->sa_restorer);
                new_ka.sa.sa_restorer = compat_ptr(u_restorer);
                 if (ret)
                        return -EFAULT;
@@ -183,9 +183,9 @@ COMPAT_SYSCALL_DEFINE5(rt_sigaction, int, sig,
        if (!ret && oact) {
                sigset_to_compat(&set32, &old_ka.sa.sa_mask);
                ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler);
-               ret |= __copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t));
-               ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
-               ret |= __put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer);
+               ret |= copy_to_user(&oact->sa_mask, &set32, sizeof(compat_sigset_t));
+               ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+               ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer), &oact->sa_restorer);
                if (ret)
                        ret = -EFAULT;
         }
index e98bfda..59dbd46 100644 (file)
@@ -177,8 +177,7 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
        unsigned long g2;
        int from_user = !(regs->psr & PSR_PS);
        int fault, code;
-       unsigned int flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                             (write ? FAULT_FLAG_WRITE : 0));
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        if (text_fault)
                address = regs->pc;
@@ -235,6 +234,11 @@ good_area:
                        goto bad_area;
        }
 
+       if (from_user)
+               flags |= FAULT_FLAG_USER;
+       if (write)
+               flags |= FAULT_FLAG_WRITE;
+
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
@@ -383,6 +387,7 @@ static void force_user_fault(unsigned long address, int write)
        struct vm_area_struct *vma;
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
+       unsigned int flags = FAULT_FLAG_USER;
        int code;
 
        code = SEGV_MAPERR;
@@ -402,11 +407,12 @@ good_area:
        if (write) {
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
                        goto bad_area;
        }
-       switch (handle_mm_fault(mm, vma, address, write ? FAULT_FLAG_WRITE : 0)) {
+       switch (handle_mm_fault(mm, vma, address, flags)) {
        case VM_FAULT_SIGBUS:
        case VM_FAULT_OOM:
                goto do_sigbus;
index 5062ff3..2ebec26 100644 (file)
@@ -315,7 +315,8 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)
                        bad_kernel_pc(regs, address);
                        return;
                }
-       }
+       } else
+               flags |= FAULT_FLAG_USER;
 
        /*
         * If we're in an interrupt or have no user
@@ -418,13 +419,14 @@ good_area:
                    vma->vm_file != NULL)
                        set_thread_fault_code(fault_code |
                                              FAULT_CODE_BLKCOMMIT);
+
+               flags |= FAULT_FLAG_WRITE;
        } else {
                /* Allow reads even for write-only mappings */
                if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
                        goto bad_area;
        }
 
-       flags |= ((fault_code & FAULT_CODE_WRITE) ? FAULT_FLAG_WRITE : 0);
        fault = handle_mm_fault(mm, vma, address, flags);
 
        if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
index d2b5944..9639964 100644 (file)
@@ -234,6 +234,11 @@ int pud_huge(pud_t pud)
        return 0;
 }
 
+int pmd_huge_support(void)
+{
+       return 0;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                             pmd_t *pmd, int write)
 {
index 932fa14..d45a2c4 100644 (file)
@@ -11,7 +11,6 @@ config TILE
        select USE_GENERIC_SMP_HELPERS
        select CC_OPTIMIZE_FOR_SIZE
        select HAVE_DEBUG_KMEMLEAK
-       select HAVE_GENERIC_HARDIRQS
        select GENERIC_IRQ_PROBE
        select GENERIC_PENDING_IRQ if SMP
        select GENERIC_IRQ_SHOW
@@ -362,7 +361,7 @@ config CMDLINE_OVERRIDE
 
 config VMALLOC_RESERVE
        hex
-       default 0x1000000
+       default 0x2000000
 
 config HARDWALL
        bool "Hardwall support to allow access to user dynamic network"
index 4f8f3d6..e19325c 100644 (file)
@@ -21,7 +21,7 @@ struct alloc_buffer_stacks_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
                                   unsigned int count, unsigned int first,
                                   unsigned int flags)
 {
@@ -45,7 +45,7 @@ struct init_buffer_stack_aux_param {
        unsigned int buffer_size_enum;
 };
 
-int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
                                     void *mem_va, size_t mem_size,
                                     unsigned int mem_flags, unsigned int stack,
                                     unsigned int buffer_size_enum)
@@ -80,7 +80,7 @@ struct alloc_notif_rings_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
                                 unsigned int count, unsigned int first,
                                 unsigned int flags)
 {
@@ -102,7 +102,7 @@ struct init_notif_ring_aux_param {
        unsigned int ring;
 };
 
-int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
                                   size_t mem_size, unsigned int mem_flags,
                                   unsigned int ring)
 {
@@ -133,7 +133,7 @@ struct request_notif_ring_interrupt_param {
        unsigned int ring;
 };
 
-int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
                                            int inter_x, int inter_y,
                                            int inter_ipi, int inter_event,
                                            unsigned int ring)
@@ -158,7 +158,7 @@ struct enable_notif_ring_interrupt_param {
        unsigned int ring;
 };
 
-int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
                                           unsigned int ring)
 {
        struct enable_notif_ring_interrupt_param temp;
@@ -179,7 +179,7 @@ struct alloc_notif_groups_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
                                  unsigned int count, unsigned int first,
                                  unsigned int flags)
 {
@@ -201,7 +201,7 @@ struct init_notif_group_param {
        gxio_mpipe_notif_group_bits_t bits;
 };
 
-int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
                                unsigned int group,
                                gxio_mpipe_notif_group_bits_t bits)
 {
@@ -223,7 +223,7 @@ struct alloc_buckets_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count,
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
                             unsigned int first, unsigned int flags)
 {
        struct alloc_buckets_param temp;
@@ -244,7 +244,7 @@ struct init_bucket_param {
        MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info;
 };
 
-int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket,
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
                           MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info)
 {
        struct init_bucket_param temp;
@@ -265,7 +265,7 @@ struct alloc_edma_rings_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags)
 {
@@ -288,7 +288,7 @@ struct init_edma_ring_aux_param {
        unsigned int channel;
 };
 
-int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
                                  size_t mem_size, unsigned int mem_flags,
                                  unsigned int ring, unsigned int channel)
 {
@@ -315,7 +315,7 @@ int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
 EXPORT_SYMBOL(gxio_mpipe_init_edma_ring_aux);
 
 
-int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob,
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
                            size_t blob_size)
 {
        const void *params = blob;
@@ -332,7 +332,7 @@ struct register_client_memory_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context,
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
                                      unsigned int iotlb, HV_PTE pte,
                                      unsigned int flags)
 {
@@ -355,7 +355,7 @@ struct link_open_aux_param {
        unsigned int flags;
 };
 
-int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
                             _gxio_mpipe_link_name_t name, unsigned int flags)
 {
        struct link_open_aux_param temp;
@@ -374,7 +374,7 @@ struct link_close_aux_param {
        int mac;
 };
 
-int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac)
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac)
 {
        struct link_close_aux_param temp;
        struct link_close_aux_param *params = &temp;
@@ -393,7 +393,7 @@ struct link_set_attr_aux_param {
        int64_t val;
 };
 
-int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
                                 uint32_t attr, int64_t val)
 {
        struct link_set_attr_aux_param temp;
@@ -415,8 +415,8 @@ struct get_timestamp_aux_param {
        uint64_t cycles;
 };
 
-int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec,
-                                uint64_t * nsec, uint64_t * cycles)
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+                                uint64_t *nsec, uint64_t *cycles)
 {
        int __result;
        struct get_timestamp_aux_param temp;
@@ -440,7 +440,7 @@ struct set_timestamp_aux_param {
        uint64_t cycles;
 };
 
-int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec,
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
                                 uint64_t nsec, uint64_t cycles)
 {
        struct set_timestamp_aux_param temp;
@@ -460,8 +460,7 @@ struct adjust_timestamp_aux_param {
        int64_t nsec;
 };
 
-int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
-                                   int64_t nsec)
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context, int64_t nsec)
 {
        struct adjust_timestamp_aux_param temp;
        struct adjust_timestamp_aux_param *params = &temp;
@@ -475,25 +474,6 @@ int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
 
 EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_aux);
 
-struct adjust_timestamp_freq_param {
-       int32_t ppb;
-};
-
-int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context,
-                                    int32_t ppb)
-{
-       struct adjust_timestamp_freq_param temp;
-       struct adjust_timestamp_freq_param *params = &temp;
-
-       params->ppb = ppb;
-
-       return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
-                            sizeof(*params),
-                            GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ);
-}
-
-EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq);
-
 struct config_edma_ring_blks_param {
        unsigned int ering;
        unsigned int max_blks;
@@ -501,7 +481,7 @@ struct config_edma_ring_blks_param {
        unsigned int db;
 };
 
-int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context,
+int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t *context,
                                     unsigned int ering, unsigned int max_blks,
                                     unsigned int min_snf_blks, unsigned int db)
 {
@@ -520,11 +500,29 @@ int gxio_mpipe_config_edma_ring_blks(gxio_mpipe_context_t * context,
 
 EXPORT_SYMBOL(gxio_mpipe_config_edma_ring_blks);
 
+struct adjust_timestamp_freq_param {
+       int32_t ppb;
+};
+
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context, int32_t ppb)
+{
+       struct adjust_timestamp_freq_param temp;
+       struct adjust_timestamp_freq_param *params = &temp;
+
+       params->ppb = ppb;
+
+       return hv_dev_pwrite(context->fd, 0, (HV_VirtAddr) params,
+                            sizeof(*params),
+                            GXIO_MPIPE_OP_ADJUST_TIMESTAMP_FREQ);
+}
+
+EXPORT_SYMBOL(gxio_mpipe_adjust_timestamp_freq);
+
 struct arm_pollfd_param {
        union iorpc_pollfd pollfd;
 };
 
-int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie)
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
 {
        struct arm_pollfd_param temp;
        struct arm_pollfd_param *params = &temp;
@@ -541,7 +539,7 @@ struct close_pollfd_param {
        union iorpc_pollfd pollfd;
 };
 
-int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie)
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie)
 {
        struct close_pollfd_param temp;
        struct close_pollfd_param *params = &temp;
@@ -558,7 +556,7 @@ struct get_mmio_base_param {
        HV_PTE base;
 };
 
-int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base)
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base)
 {
        int __result;
        struct get_mmio_base_param temp;
@@ -579,7 +577,7 @@ struct check_mmio_offset_param {
        unsigned long size;
 };
 
-int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context,
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
                                 unsigned long offset, unsigned long size)
 {
        struct check_mmio_offset_param temp;
index 64883aa..77019c6 100644 (file)
 /* This file is machine-generated; DO NOT EDIT! */
 #include "gxio/iorpc_mpipe_info.h"
 
-
 struct instance_aux_param {
        _gxio_mpipe_link_name_t name;
 };
 
-int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
                                 _gxio_mpipe_link_name_t name)
 {
        struct instance_aux_param temp;
@@ -39,10 +38,10 @@ struct enumerate_aux_param {
        _gxio_mpipe_link_mac_t mac;
 };
 
-int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
                                  unsigned int idx,
-                                 _gxio_mpipe_link_name_t * name,
-                                 _gxio_mpipe_link_mac_t * mac)
+                                 _gxio_mpipe_link_name_t *name,
+                                 _gxio_mpipe_link_mac_t *mac)
 {
        int __result;
        struct enumerate_aux_param temp;
@@ -50,7 +49,7 @@ int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
 
        __result =
            hv_dev_pread(context->fd, 0, (HV_VirtAddr) params, sizeof(*params),
-                        (((uint64_t) idx << 32) |
+                        (((uint64_t)idx << 32) |
                          GXIO_MPIPE_INFO_OP_ENUMERATE_AUX));
        *name = params->name;
        *mac = params->mac;
@@ -64,7 +63,7 @@ struct get_mmio_base_param {
        HV_PTE base;
 };
 
-int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
                                  HV_PTE *base)
 {
        int __result;
@@ -86,7 +85,7 @@ struct check_mmio_offset_param {
        unsigned long size;
 };
 
-int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
                                      unsigned long offset, unsigned long size)
 {
        struct check_mmio_offset_param temp;
index da6e18e..1d3cedb 100644 (file)
@@ -21,7 +21,7 @@ struct alloc_asids_param {
        unsigned int flags;
 };
 
-int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count,
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
                          unsigned int first, unsigned int flags)
 {
        struct alloc_asids_param temp;
@@ -44,7 +44,7 @@ struct alloc_memory_maps_param {
        unsigned int flags;
 };
 
-int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags)
 {
@@ -67,7 +67,7 @@ struct alloc_scatter_queues_param {
        unsigned int flags;
 };
 
-int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
                                   unsigned int count, unsigned int first,
                                   unsigned int flags)
 {
@@ -91,7 +91,7 @@ struct alloc_pio_regions_param {
        unsigned int flags;
 };
 
-int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags)
 {
@@ -115,7 +115,7 @@ struct init_pio_region_aux_param {
        unsigned int flags;
 };
 
-int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context,
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
                                  unsigned int pio_region, unsigned int mac,
                                  uint32_t bus_address_hi, unsigned int flags)
 {
@@ -145,7 +145,7 @@ struct init_memory_map_mmu_aux_param {
        unsigned int order_mode;
 };
 
-int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context,
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
                                      unsigned int map, unsigned long va,
                                      uint64_t size, unsigned int asid,
                                      unsigned int mac, uint64_t bus_address,
@@ -175,7 +175,7 @@ struct get_port_property_param {
        struct pcie_trio_ports_property trio_ports;
 };
 
-int gxio_trio_get_port_property(gxio_trio_context_t * context,
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
                                struct pcie_trio_ports_property *trio_ports)
 {
        int __result;
@@ -198,7 +198,7 @@ struct config_legacy_intr_param {
        unsigned int intx;
 };
 
-int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
                                 int inter_y, int inter_ipi, int inter_event,
                                 unsigned int mac, unsigned int intx)
 {
@@ -227,7 +227,7 @@ struct config_msi_intr_param {
        unsigned int asid;
 };
 
-int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
                              int inter_y, int inter_ipi, int inter_event,
                              unsigned int mac, unsigned int mem_map,
                              uint64_t mem_map_base, uint64_t mem_map_limit,
@@ -259,7 +259,7 @@ struct set_mps_mrs_param {
        unsigned int mac;
 };
 
-int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps,
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
                          uint16_t mrs, unsigned int mac)
 {
        struct set_mps_mrs_param temp;
@@ -279,7 +279,7 @@ struct force_rc_link_up_param {
        unsigned int mac;
 };
 
-int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac)
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac)
 {
        struct force_rc_link_up_param temp;
        struct force_rc_link_up_param *params = &temp;
@@ -296,7 +296,7 @@ struct force_ep_link_up_param {
        unsigned int mac;
 };
 
-int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac)
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac)
 {
        struct force_ep_link_up_param temp;
        struct force_ep_link_up_param *params = &temp;
@@ -313,7 +313,7 @@ struct get_mmio_base_param {
        HV_PTE base;
 };
 
-int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base)
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base)
 {
        int __result;
        struct get_mmio_base_param temp;
@@ -334,7 +334,7 @@ struct check_mmio_offset_param {
        unsigned long size;
 };
 
-int gxio_trio_check_mmio_offset(gxio_trio_context_t * context,
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
                                unsigned long offset, unsigned long size)
 {
        struct check_mmio_offset_param temp;
index cf3c3cc..9c82007 100644 (file)
@@ -19,7 +19,7 @@ struct cfg_interrupt_param {
        union iorpc_interrupt interrupt;
 };
 
-int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x,
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
                                int inter_y, int inter_ipi, int inter_event)
 {
        struct cfg_interrupt_param temp;
@@ -41,7 +41,7 @@ struct register_client_memory_param {
        unsigned int flags;
 };
 
-int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context,
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
                                         HV_PTE pte, unsigned int flags)
 {
        struct register_client_memory_param temp;
@@ -61,7 +61,7 @@ struct get_mmio_base_param {
        HV_PTE base;
 };
 
-int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context, HV_PTE *base)
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context, HV_PTE *base)
 {
        int __result;
        struct get_mmio_base_param temp;
@@ -82,7 +82,7 @@ struct check_mmio_offset_param {
        unsigned long size;
 };
 
-int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context,
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
                                    unsigned long offset, unsigned long size)
 {
        struct check_mmio_offset_param temp;
index 66b002f..785afad 100644 (file)
@@ -26,7 +26,7 @@
 #include <gxio/kiorpc.h>
 #include <gxio/usb_host.h>
 
-int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
+int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
                       int is_ehci)
 {
        char file[32];
@@ -63,7 +63,7 @@ int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
 
 EXPORT_SYMBOL_GPL(gxio_usb_host_init);
 
-int gxio_usb_host_destroy(gxio_usb_host_context_t * context)
+int gxio_usb_host_destroy(gxio_usb_host_context_t *context)
 {
        iounmap((void __force __iomem *)(context->mmio_base));
        hv_dev_close(context->fd);
@@ -76,14 +76,14 @@ int gxio_usb_host_destroy(gxio_usb_host_context_t * context)
 
 EXPORT_SYMBOL_GPL(gxio_usb_host_destroy);
 
-void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context)
+void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context)
 {
        return context->mmio_base;
 }
 
 EXPORT_SYMBOL_GPL(gxio_usb_host_get_reg_start);
 
-size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context)
+size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context)
 {
        return HV_USB_HOST_MMIO_SIZE;
 }
index 8a33912..904538e 100644 (file)
@@ -176,7 +176,18 @@ typedef union
      */
     uint_reg_t stack_idx    : 5;
     /* Reserved. */
-    uint_reg_t __reserved_2 : 5;
+    uint_reg_t __reserved_2 : 3;
+    /*
+     * Instance ID.  For devices that support automatic buffer return between
+     * mPIPE instances, this field indicates the buffer owner.  If the INST
+     * field does not match the mPIPE's instance number when a packet is
+     * egressed, buffers with HWB set will be returned to the other mPIPE
+     * instance.  Note that not all devices support multi-mPIPE buffer
+     * return.  The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+     * whether the INST field in the buffer descriptor is populated by iDMA
+     * hardware. This field is ignored on writes.
+     */
+    uint_reg_t inst         : 2;
     /*
      * Reads as one to indicate that this is a hardware managed buffer.
      * Ignored on writes since all buffers on a given stack are the same size.
@@ -205,7 +216,8 @@ typedef union
     uint_reg_t c            : 2;
     uint_reg_t size         : 3;
     uint_reg_t hwb          : 1;
-    uint_reg_t __reserved_2 : 5;
+    uint_reg_t inst         : 2;
+    uint_reg_t __reserved_2 : 3;
     uint_reg_t stack_idx    : 5;
     uint_reg_t __reserved_1 : 6;
     int_reg_t va           : 35;
@@ -231,9 +243,9 @@ typedef union
     /* Reserved. */
     uint_reg_t __reserved_0 : 3;
     /* eDMA ring being accessed */
-    uint_reg_t ring         : 5;
+    uint_reg_t ring         : 6;
     /* Reserved. */
-    uint_reg_t __reserved_1 : 18;
+    uint_reg_t __reserved_1 : 17;
     /*
      * This field of the address selects the region (address space) to be
      * accessed.  For the egress DMA post region, this field must be 5.
@@ -250,8 +262,8 @@ typedef union
     uint_reg_t svc_dom      : 5;
     uint_reg_t __reserved_2 : 6;
     uint_reg_t region       : 3;
-    uint_reg_t __reserved_1 : 18;
-    uint_reg_t ring         : 5;
+    uint_reg_t __reserved_1 : 17;
+    uint_reg_t ring         : 6;
     uint_reg_t __reserved_0 : 3;
 #endif
   };
index 410a040..84022ac 100644 (file)
 #ifndef __ARCH_MPIPE_CONSTANTS_H__
 #define __ARCH_MPIPE_CONSTANTS_H__
 
-#define MPIPE_NUM_CLASSIFIERS 10
+#define MPIPE_NUM_CLASSIFIERS 16
 #define MPIPE_CLS_MHZ 1200
 
-#define MPIPE_NUM_EDMA_RINGS 32
+#define MPIPE_NUM_EDMA_RINGS 64
 
 #define MPIPE_NUM_SGMII_MACS 16
-#define MPIPE_NUM_XAUI_MACS 4
+#define MPIPE_NUM_XAUI_MACS 16
 #define MPIPE_NUM_LOOPBACK_CHANNELS 4
 #define MPIPE_NUM_NON_LB_CHANNELS 28
 
index f2e9e12..13b3c43 100644 (file)
@@ -44,8 +44,14 @@ typedef union
      * descriptors toggles each time the ring tail pointer wraps.
      */
     uint_reg_t gen        : 1;
+    /**
+     * For devices with EDMA reorder support, this field allows the
+     * descriptor to select the egress FIFO.  The associated DMA ring must
+     * have ALLOW_EFIFO_SEL enabled.
+     */
+    uint_reg_t efifo_sel  : 6;
     /** Reserved.  Must be zero. */
-    uint_reg_t r0         : 7;
+    uint_reg_t r0         : 1;
     /** Checksum generation enabled for this transfer. */
     uint_reg_t csum       : 1;
     /**
@@ -110,7 +116,8 @@ typedef union
     uint_reg_t notif      : 1;
     uint_reg_t ns         : 1;
     uint_reg_t csum       : 1;
-    uint_reg_t r0         : 7;
+    uint_reg_t r0         : 1;
+    uint_reg_t efifo_sel  : 6;
     uint_reg_t gen        : 1;
 #endif
 
@@ -126,14 +133,16 @@ typedef union
     /** Reserved. */
     uint_reg_t __reserved_1 : 3;
     /**
-     * Instance ID.  For devices that support more than one mPIPE instance,
-     * this field indicates the buffer owner.  If the INST field does not
-     * match the mPIPE's instance number when a packet is egressed, buffers
-     * with HWB set will be returned to the other mPIPE instance.
+     * Instance ID.  For devices that support automatic buffer return between
+     * mPIPE instances, this field indicates the buffer owner.  If the INST
+     * field does not match the mPIPE's instance number when a packet is
+     * egressed, buffers with HWB set will be returned to the other mPIPE
+     * instance.  Note that not all devices support multi-mPIPE buffer
+     * return.  The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+     * whether the INST field in the buffer descriptor is populated by iDMA
+     * hardware.
      */
-    uint_reg_t inst         : 1;
-    /** Reserved. */
-    uint_reg_t __reserved_2 : 1;
+    uint_reg_t inst         : 2;
     /**
      * Always set to one by hardware in iDMA packet descriptors.  For eDMA,
      * indicates whether the buffer will be released to the buffer stack
@@ -166,8 +175,7 @@ typedef union
     uint_reg_t c            : 2;
     uint_reg_t size         : 3;
     uint_reg_t hwb          : 1;
-    uint_reg_t __reserved_2 : 1;
-    uint_reg_t inst         : 1;
+    uint_reg_t inst         : 2;
     uint_reg_t __reserved_1 : 3;
     uint_reg_t stack_idx    : 5;
     uint_reg_t __reserved_0 : 6;
@@ -408,7 +416,10 @@ typedef union
     /**
      * Sequence number applied when packet is distributed.   Classifier
      * selects which sequence number is to be applied by writing the 13-bit
-     * SQN-selector into this field.
+     * SQN-selector into this field.  For devices that support EXT_SQN (as
+     * indicated in IDMA_INFO.EXT_SQN_SUPPORT), the GP_SQN can be extended to
+     * 32-bits via the IDMA_CTL.EXT_SQN register.  In this case the
+     * PACKET_SQN will be reduced to 32 bits.
      */
     uint_reg_t gp_sqn     : 16;
     /**
@@ -451,14 +462,16 @@ typedef union
     /** Reserved. */
     uint_reg_t __reserved_5 : 3;
     /**
-     * Instance ID.  For devices that support more than one mPIPE instance,
-     * this field indicates the buffer owner.  If the INST field does not
-     * match the mPIPE's instance number when a packet is egressed, buffers
-     * with HWB set will be returned to the other mPIPE instance.
+     * Instance ID.  For devices that support automatic buffer return between
+     * mPIPE instances, this field indicates the buffer owner.  If the INST
+     * field does not match the mPIPE's instance number when a packet is
+     * egressed, buffers with HWB set will be returned to the other mPIPE
+     * instance.  Note that not all devices support multi-mPIPE buffer
+     * return.  The MPIPE_EDMA_INFO.REMOTE_BUFF_RTN_SUPPORT bit indicates
+     * whether the INST field in the buffer descriptor is populated by iDMA
+     * hardware.
      */
-    uint_reg_t inst         : 1;
-    /** Reserved. */
-    uint_reg_t __reserved_6 : 1;
+    uint_reg_t inst         : 2;
     /**
      * Always set to one by hardware in iDMA packet descriptors.  For eDMA,
      * indicates whether the buffer will be released to the buffer stack
@@ -491,8 +504,7 @@ typedef union
     uint_reg_t c            : 2;
     uint_reg_t size         : 3;
     uint_reg_t hwb          : 1;
-    uint_reg_t __reserved_6 : 1;
-    uint_reg_t inst         : 1;
+    uint_reg_t inst         : 2;
     uint_reg_t __reserved_5 : 3;
     uint_reg_t stack_idx    : 5;
     uint_reg_t __reserved_4 : 6;
index 628b045..85647e9 100644 (file)
 #ifndef __ARCH_TRIO_CONSTANTS_H__
 #define __ARCH_TRIO_CONSTANTS_H__
 
-#define TRIO_NUM_ASIDS 16
+#define TRIO_NUM_ASIDS 32
 #define TRIO_NUM_TLBS_PER_ASID 16
 
 #define TRIO_NUM_TPIO_REGIONS 8
 #define TRIO_LOG2_NUM_TPIO_REGIONS 3
 
-#define TRIO_NUM_MAP_MEM_REGIONS 16
-#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 4
+#define TRIO_NUM_MAP_MEM_REGIONS 32
+#define TRIO_LOG2_NUM_MAP_MEM_REGIONS 5
 #define TRIO_NUM_MAP_SQ_REGIONS 8
 #define TRIO_LOG2_NUM_MAP_SQ_REGIONS 3
 
 #define TRIO_LOG2_NUM_SQ_FIFO_ENTRIES 6
 
-#define TRIO_NUM_PUSH_DMA_RINGS 32
+#define TRIO_NUM_PUSH_DMA_RINGS 64
 
-#define TRIO_NUM_PULL_DMA_RINGS 32
+#define TRIO_NUM_PULL_DMA_RINGS 64
 
 #endif /* __ARCH_TRIO_CONSTANTS_H__ */
index 6346888..6727680 100644 (file)
@@ -182,10 +182,9 @@ static inline __attribute_const__ int get_order(unsigned long size)
 
 #define PAGE_OFFSET            (-(_AC(1, UL) << (MAX_VA_WIDTH - 1)))
 #define KERNEL_HIGH_VADDR      _AC(0xfffffff800000000, UL)  /* high 32GB */
-#define FIXADDR_BASE           (KERNEL_HIGH_VADDR - 0x400000000) /* 4 GB */
-#define FIXADDR_TOP            (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_BASE           (KERNEL_HIGH_VADDR - 0x300000000) /* 4 GB */
+#define FIXADDR_TOP            (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
 #define _VMALLOC_START         FIXADDR_TOP
-#define HUGE_VMAP_BASE         (KERNEL_HIGH_VADDR - 0x200000000) /* 4 GB */
 #define MEM_SV_START           (KERNEL_HIGH_VADDR - 0x100000000) /* 256 MB */
 #define MEM_MODULE_START       (MEM_SV_START + (256*1024*1024)) /* 256 MB */
 #define MEM_MODULE_END         (MEM_MODULE_START + (256*1024*1024))
index 63142ab..d26a422 100644 (file)
 #define PKMAP_BASE   ((FIXADDR_BOOT_START - PAGE_SIZE*LAST_PKMAP) & PGDIR_MASK)
 
 #ifdef CONFIG_HIGHMEM
-# define __VMAPPING_END        (PKMAP_BASE & ~(HPAGE_SIZE-1))
+# define _VMALLOC_END  (PKMAP_BASE & ~(HPAGE_SIZE-1))
 #else
-# define __VMAPPING_END        (FIXADDR_START & ~(HPAGE_SIZE-1))
-#endif
-
-#ifdef CONFIG_HUGEVMAP
-#define HUGE_VMAP_END  __VMAPPING_END
-#define HUGE_VMAP_BASE (HUGE_VMAP_END - CONFIG_NR_HUGE_VMAPS * HPAGE_SIZE)
-#define _VMALLOC_END   HUGE_VMAP_BASE
-#else
-#define _VMALLOC_END   __VMAPPING_END
+# define _VMALLOC_END  (FIXADDR_START & ~(HPAGE_SIZE-1))
 #endif
 
 /*
index 3421177..2c8a9cd 100644 (file)
  * memory allocation code).  The vmalloc code puts in an internal
  * guard page between each allocation.
  */
-#define _VMALLOC_END   HUGE_VMAP_BASE
+#define _VMALLOC_END   MEM_SV_START
 #define VMALLOC_END    _VMALLOC_END
 #define VMALLOC_START  _VMALLOC_START
 
-#define HUGE_VMAP_END  (HUGE_VMAP_BASE + PGDIR_SIZE)
-
 #ifndef __ASSEMBLY__
 
 /* We have no pud since we are a three-level page table. */
index fdd07f8..4cda03d 100644 (file)
 #define GXIO_MPIPE_OP_GET_MMIO_BASE    IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
 #define GXIO_MPIPE_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
 
-int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_buffer_stacks(gxio_mpipe_context_t *context,
                                   unsigned int count, unsigned int first,
                                   unsigned int flags);
 
-int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_buffer_stack_aux(gxio_mpipe_context_t *context,
                                     void *mem_va, size_t mem_size,
                                     unsigned int mem_flags, unsigned int stack,
                                     unsigned int buffer_size_enum);
 
 
-int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_rings(gxio_mpipe_context_t *context,
                                 unsigned int count, unsigned int first,
                                 unsigned int flags);
 
-int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_notif_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
                                   size_t mem_size, unsigned int mem_flags,
                                   unsigned int ring);
 
-int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_request_notif_ring_interrupt(gxio_mpipe_context_t *context,
                                            int inter_x, int inter_y,
                                            int inter_ipi, int inter_event,
                                            unsigned int ring);
 
-int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t * context,
+int gxio_mpipe_enable_notif_ring_interrupt(gxio_mpipe_context_t *context,
                                           unsigned int ring);
 
-int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_notif_groups(gxio_mpipe_context_t *context,
                                  unsigned int count, unsigned int first,
                                  unsigned int flags);
 
-int gxio_mpipe_init_notif_group(gxio_mpipe_context_t * context,
+int gxio_mpipe_init_notif_group(gxio_mpipe_context_t *context,
                                unsigned int group,
                                gxio_mpipe_notif_group_bits_t bits);
 
-int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t * context, unsigned int count,
+int gxio_mpipe_alloc_buckets(gxio_mpipe_context_t *context, unsigned int count,
                             unsigned int first, unsigned int flags);
 
-int gxio_mpipe_init_bucket(gxio_mpipe_context_t * context, unsigned int bucket,
+int gxio_mpipe_init_bucket(gxio_mpipe_context_t *context, unsigned int bucket,
                           MPIPE_LBL_INIT_DAT_BSTS_TBL_t bucket_info);
 
-int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t * context,
+int gxio_mpipe_alloc_edma_rings(gxio_mpipe_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags);
 
-int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t * context, void *mem_va,
+int gxio_mpipe_init_edma_ring_aux(gxio_mpipe_context_t *context, void *mem_va,
                                  size_t mem_size, unsigned int mem_flags,
                                  unsigned int ring, unsigned int channel);
 
 
-int gxio_mpipe_commit_rules(gxio_mpipe_context_t * context, const void *blob,
+int gxio_mpipe_commit_rules(gxio_mpipe_context_t *context, const void *blob,
                            size_t blob_size);
 
-int gxio_mpipe_register_client_memory(gxio_mpipe_context_t * context,
+int gxio_mpipe_register_client_memory(gxio_mpipe_context_t *context,
                                      unsigned int iotlb, HV_PTE pte,
                                      unsigned int flags);
 
-int gxio_mpipe_link_open_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_link_open_aux(gxio_mpipe_context_t *context,
                             _gxio_mpipe_link_name_t name, unsigned int flags);
 
-int gxio_mpipe_link_close_aux(gxio_mpipe_context_t * context, int mac);
+int gxio_mpipe_link_close_aux(gxio_mpipe_context_t *context, int mac);
 
-int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t * context, int mac,
+int gxio_mpipe_link_set_attr_aux(gxio_mpipe_context_t *context, int mac,
                                 uint32_t attr, int64_t val);
 
-int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t * context, uint64_t * sec,
-                                uint64_t * nsec, uint64_t * cycles);
+int gxio_mpipe_get_timestamp_aux(gxio_mpipe_context_t *context, uint64_t *sec,
+                                uint64_t *nsec, uint64_t *cycles);
 
-int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t * context, uint64_t sec,
+int gxio_mpipe_set_timestamp_aux(gxio_mpipe_context_t *context, uint64_t sec,
                                 uint64_t nsec, uint64_t cycles);
 
-int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t * context,
+int gxio_mpipe_adjust_timestamp_aux(gxio_mpipe_context_t *context,
                                    int64_t nsec);
 
-int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t * context,
+int gxio_mpipe_adjust_timestamp_freq(gxio_mpipe_context_t *context,
                                     int32_t ppb);
 
-int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie);
+int gxio_mpipe_arm_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
 
-int gxio_mpipe_close_pollfd(gxio_mpipe_context_t * context, int pollfd_cookie);
+int gxio_mpipe_close_pollfd(gxio_mpipe_context_t *context, int pollfd_cookie);
 
-int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t * context, HV_PTE *base);
+int gxio_mpipe_get_mmio_base(gxio_mpipe_context_t *context, HV_PTE *base);
 
-int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t * context,
+int gxio_mpipe_check_mmio_offset(gxio_mpipe_context_t *context,
                                 unsigned long offset, unsigned long size);
 
 #endif /* !__GXIO_MPIPE_LINUX_RPC_H__ */
index 476c5e5..f0b0428 100644 (file)
 #define GXIO_MPIPE_INFO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
 
 
-int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_instance_aux(gxio_mpipe_info_context_t *context,
                                 _gxio_mpipe_link_name_t name);
 
-int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_enumerate_aux(gxio_mpipe_info_context_t *context,
                                  unsigned int idx,
-                                 _gxio_mpipe_link_name_t * name,
-                                 _gxio_mpipe_link_mac_t * mac);
+                                 _gxio_mpipe_link_name_t *name,
+                                 _gxio_mpipe_link_mac_t *mac);
 
-int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_get_mmio_base(gxio_mpipe_info_context_t *context,
                                  HV_PTE *base);
 
-int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t * context,
+int gxio_mpipe_info_check_mmio_offset(gxio_mpipe_info_context_t *context,
                                      unsigned long offset, unsigned long size);
 
 #endif /* !__GXIO_MPIPE_INFO_LINUX_RPC_H__ */
index d95b96f..376a4f7 100644 (file)
 #define GXIO_TRIO_OP_GET_MMIO_BASE     IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
 #define GXIO_TRIO_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
 
-int gxio_trio_alloc_asids(gxio_trio_context_t * context, unsigned int count,
+int gxio_trio_alloc_asids(gxio_trio_context_t *context, unsigned int count,
                          unsigned int first, unsigned int flags);
 
 
-int gxio_trio_alloc_memory_maps(gxio_trio_context_t * context,
+int gxio_trio_alloc_memory_maps(gxio_trio_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags);
 
 
-int gxio_trio_alloc_scatter_queues(gxio_trio_context_t * context,
+int gxio_trio_alloc_scatter_queues(gxio_trio_context_t *context,
                                   unsigned int count, unsigned int first,
                                   unsigned int flags);
 
-int gxio_trio_alloc_pio_regions(gxio_trio_context_t * context,
+int gxio_trio_alloc_pio_regions(gxio_trio_context_t *context,
                                unsigned int count, unsigned int first,
                                unsigned int flags);
 
-int gxio_trio_init_pio_region_aux(gxio_trio_context_t * context,
+int gxio_trio_init_pio_region_aux(gxio_trio_context_t *context,
                                  unsigned int pio_region, unsigned int mac,
                                  uint32_t bus_address_hi, unsigned int flags);
 
 
-int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t * context,
+int gxio_trio_init_memory_map_mmu_aux(gxio_trio_context_t *context,
                                      unsigned int map, unsigned long va,
                                      uint64_t size, unsigned int asid,
                                      unsigned int mac, uint64_t bus_address,
                                      unsigned int node,
                                      unsigned int order_mode);
 
-int gxio_trio_get_port_property(gxio_trio_context_t * context,
+int gxio_trio_get_port_property(gxio_trio_context_t *context,
                                struct pcie_trio_ports_property *trio_ports);
 
-int gxio_trio_config_legacy_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_legacy_intr(gxio_trio_context_t *context, int inter_x,
                                 int inter_y, int inter_ipi, int inter_event,
                                 unsigned int mac, unsigned int intx);
 
-int gxio_trio_config_msi_intr(gxio_trio_context_t * context, int inter_x,
+int gxio_trio_config_msi_intr(gxio_trio_context_t *context, int inter_x,
                              int inter_y, int inter_ipi, int inter_event,
                              unsigned int mac, unsigned int mem_map,
                              uint64_t mem_map_base, uint64_t mem_map_limit,
                              unsigned int asid);
 
 
-int gxio_trio_set_mps_mrs(gxio_trio_context_t * context, uint16_t mps,
+int gxio_trio_set_mps_mrs(gxio_trio_context_t *context, uint16_t mps,
                          uint16_t mrs, unsigned int mac);
 
-int gxio_trio_force_rc_link_up(gxio_trio_context_t * context, unsigned int mac);
+int gxio_trio_force_rc_link_up(gxio_trio_context_t *context, unsigned int mac);
 
-int gxio_trio_force_ep_link_up(gxio_trio_context_t * context, unsigned int mac);
+int gxio_trio_force_ep_link_up(gxio_trio_context_t *context, unsigned int mac);
 
-int gxio_trio_get_mmio_base(gxio_trio_context_t * context, HV_PTE *base);
+int gxio_trio_get_mmio_base(gxio_trio_context_t *context, HV_PTE *base);
 
-int gxio_trio_check_mmio_offset(gxio_trio_context_t * context,
+int gxio_trio_check_mmio_offset(gxio_trio_context_t *context,
                                unsigned long offset, unsigned long size);
 
 #endif /* !__GXIO_TRIO_LINUX_RPC_H__ */
index 8622e7d..79962a9 100644 (file)
 #define GXIO_USB_HOST_OP_GET_MMIO_BASE IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8000)
 #define GXIO_USB_HOST_OP_CHECK_MMIO_OFFSET IORPC_OPCODE(IORPC_FORMAT_NONE_NOUSER, 0x8001)
 
-int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t * context, int inter_x,
+int gxio_usb_host_cfg_interrupt(gxio_usb_host_context_t *context, int inter_x,
                                int inter_y, int inter_ipi, int inter_event);
 
-int gxio_usb_host_register_client_memory(gxio_usb_host_context_t * context,
+int gxio_usb_host_register_client_memory(gxio_usb_host_context_t *context,
                                         HV_PTE pte, unsigned int flags);
 
-int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t * context,
+int gxio_usb_host_get_mmio_base(gxio_usb_host_context_t *context,
                                HV_PTE *base);
 
-int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t * context,
+int gxio_usb_host_check_mmio_offset(gxio_usb_host_context_t *context,
                                    unsigned long offset, unsigned long size);
 
 #endif /* !__GXIO_USB_HOST_LINUX_RPC_H__ */
index 5eedec0..93c9636 100644 (file)
@@ -53,7 +53,7 @@ typedef struct {
  * @return Zero if the context was successfully initialized, else a
  *  GXIO_ERR_xxx error code.
  */
-extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
+extern int gxio_usb_host_init(gxio_usb_host_context_t *context, int usb_index,
                              int is_ehci);
 
 /* Destroy a USB context.
@@ -68,20 +68,20 @@ extern int gxio_usb_host_init(gxio_usb_host_context_t * context, int usb_index,
  * @return Zero if the context was successfully destroyed, else a
  *  GXIO_ERR_xxx error code.
  */
-extern int gxio_usb_host_destroy(gxio_usb_host_context_t * context);
+extern int gxio_usb_host_destroy(gxio_usb_host_context_t *context);
 
 /* Retrieve the address of the shim's MMIO registers.
  *
  * @param context Pointer to a properly initialized gxio_usb_host_context_t.
  * @return The address of the shim's MMIO registers.
  */
-extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t * context);
+extern void *gxio_usb_host_get_reg_start(gxio_usb_host_context_t *context);
 
 /* Retrieve the length of the shim's MMIO registers.
  *
  * @param context Pointer to a properly initialized gxio_usb_host_context_t.
  * @return The length of the shim's MMIO registers.
  */
-extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t * context);
+extern size_t gxio_usb_host_get_reg_len(gxio_usb_host_context_t *context);
 
 #endif /* _GXIO_USB_H_ */
index ed37841..4912084 100644 (file)
@@ -84,7 +84,7 @@ COMPAT_SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned int, offset_high,
 {
        return sys_llseek(fd, offset_high, offset_low, result, origin);
 }
+
 /* Provide the compat syscall number to call mapping. */
 #undef __SYSCALL
 #define __SYSCALL(nr, call) [nr] = (call),
diff --git a/arch/tile/kernel/futex_64.S b/arch/tile/kernel/futex_64.S
deleted file mode 100644 (file)
index f465d1e..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-/*
- * Copyright 2011 Tilera Corporation. All Rights Reserved.
- *
- *   This program is free software; you can redistribute it and/or
- *   modify it under the terms of the GNU General Public License
- *   as published by the Free Software Foundation, version 2.
- *
- *   This program is distributed in the hope that it will be useful, but
- *   WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- *   NON INFRINGEMENT.  See the GNU General Public License for
- *   more details.
- *
- * Atomically access user memory, but use MMU to avoid propagating
- * kernel exceptions.
- */
-
-#include <linux/linkage.h>
-#include <asm/errno.h>
-#include <asm/futex.h>
-#include <asm/page.h>
-#include <asm/processor.h>
-
-/*
- * Provide a set of atomic memory operations supporting <asm/futex.h>.
- *
- * r0: user address to manipulate
- * r1: new value to write, or for cmpxchg, old value to compare against
- * r2: (cmpxchg only) new value to write
- *
- * Return __get_user struct, r0 with value, r1 with error.
- */
-#define FUTEX_OP(name, ...) \
-STD_ENTRY(futex_##name)                        \
-       __VA_ARGS__;                    \
-       {                               \
-        move   r1, zero;               \
-        jrp    lr                      \
-       };                              \
-       STD_ENDPROC(futex_##name);      \
-       .pushsection __ex_table,"a";    \
-       .quad 1b, get_user_fault;       \
-       .popsection
-
-       .pushsection .fixup,"ax"
-get_user_fault:
-       { movei r1, -EFAULT; jrp lr }
-       ENDPROC(get_user_fault)
-       .popsection
-
-FUTEX_OP(cmpxchg, mtspr CMPEXCH_VALUE, r1; 1: cmpexch4 r0, r0, r2)
-FUTEX_OP(set, 1: exch4 r0, r0, r1)
-FUTEX_OP(add, 1: fetchadd4 r0, r0, r1)
-FUTEX_OP(or, 1: fetchor4 r0, r0, r1)
-FUTEX_OP(andn, nor r1, r1, zero; 1: fetchand4 r0, r0, r1)
index 4c34cae..74c9172 100644 (file)
@@ -1268,8 +1268,7 @@ static void __init validate_va(void)
        if ((long)VMALLOC_START >= 0)
                early_panic(
                        "Linux VMALLOC region below the 2GB line (%#lx)!\n"
-                       "Reconfigure the kernel with fewer NR_HUGE_VMAPS\n"
-                       "or smaller VMALLOC_RESERVE.\n",
+                       "Reconfigure the kernel with smaller VMALLOC_RESERVE.\n",
                        VMALLOC_START);
 #endif
 }
index b425fb6..b030b4e 100644 (file)
@@ -551,8 +551,8 @@ static tilegx_bundle_bits  jit_x1_bnezt(int ra, int broff)
 /*
  * This function generates unalign fixup JIT.
  *
- * We fist find unalign load/store instruction's destination, source
- * reguisters: ra, rb and rd. and 3 scratch registers by calling
+ * We first find unalign load/store instruction's destination, source
+ * registers: ra, rb and rd. and 3 scratch registers by calling
  * find_regs(...). 3 scratch clobbers should not alias with any register
  * used in the fault bundle. Then analyze the fault bundle to determine
  * if it's a load or store, operand width, branch or address increment etc.
index 111d5a9..6c05712 100644 (file)
@@ -149,8 +149,6 @@ static inline int vmalloc_fault(pgd_t *pgd, unsigned long address)
        pmd_k = vmalloc_sync_one(pgd, address);
        if (!pmd_k)
                return -1;
-       if (pmd_huge(*pmd_k))
-               return 0;   /* support TILE huge_vmap() API */
        pte_k = pte_offset_kernel(pmd_k, address);
        if (!pte_present(*pte_k))
                return -1;
@@ -280,8 +278,7 @@ static int handle_page_fault(struct pt_regs *regs,
        if (!is_page_fault)
                write = 1;
 
-       flags = (FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                (write ? FAULT_FLAG_WRITE : 0));
+       flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        is_kernel_mode = !user_mode(regs);
 
@@ -365,6 +362,9 @@ static int handle_page_fault(struct pt_regs *regs,
                goto bad_area_nosemaphore;
        }
 
+       if (!is_kernel_mode)
+               flags |= FAULT_FLAG_USER;
+
        /*
         * When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in the
@@ -425,12 +425,12 @@ good_area:
 #endif
                if (!(vma->vm_flags & VM_WRITE))
                        goto bad_area;
+               flags |= FAULT_FLAG_WRITE;
        } else {
                if (!is_page_fault || !(vma->vm_flags & VM_READ))
                        goto bad_area;
        }
 
- survive:
        /*
         * If for any reason at all we couldn't handle the fault,
         * make sure we exit gracefully rather than endlessly redo
@@ -555,11 +555,6 @@ no_context:
  */
 out_of_memory:
        up_read(&mm->mmap_sem);
-       if (is_global_init(tsk)) {
-               yield();
-               down_read(&mm->mmap_sem);
-               goto survive;
-       }
        if (is_kernel_mode)
                goto no_context;
        pagefault_out_of_memory();
index e514899..0cb3bba 100644 (file)
@@ -166,6 +166,11 @@ int pud_huge(pud_t pud)
        return !!(pud_val(pud) & _PAGE_HUGE_PAGE);
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
+
 struct page *follow_huge_pmd(struct mm_struct *mm, unsigned long address,
                             pmd_t *pmd, int write)
 {
index 4e316de..0fa1acf 100644 (file)
@@ -827,10 +827,6 @@ void __init mem_init(void)
               FIXADDR_START, FIXADDR_TOP + PAGE_SIZE - 1);
        printk(KERN_DEBUG "  PKMAP   %#lx - %#lx\n",
               PKMAP_BASE, PKMAP_ADDR(LAST_PKMAP) - 1);
-#endif
-#ifdef CONFIG_HUGEVMAP
-       printk(KERN_DEBUG "  HUGEMAP %#lx - %#lx\n",
-              HUGE_VMAP_BASE, HUGE_VMAP_END - 1);
 #endif
        printk(KERN_DEBUG "  VMALLOC %#lx - %#lx\n",
               _VMALLOC_START, _VMALLOC_END - 1);
index 2deaddf..4fd9ec0 100644 (file)
@@ -127,8 +127,7 @@ void shatter_huge_page(unsigned long addr)
        }
 
        /* Shatter the huge page into the preallocated L2 page table. */
-       pmd_populate_kernel(&init_mm, pmd,
-                           get_prealloc_pte(pte_pfn(*(pte_t *)pmd)));
+       pmd_populate_kernel(&init_mm, pmd, get_prealloc_pte(pmd_pfn(*pmd)));
 
 #ifdef __PAGETABLE_PMD_FOLDED
        /* Walk every pgd on the system and update the pmd there. */
index bceee66..8ddea1f 100644 (file)
@@ -6,7 +6,6 @@ config DEFCONFIG_LIST
 config UML
        bool
        default y
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_UID16
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
index 08107a7..2665e6b 100644 (file)
@@ -129,12 +129,10 @@ CONFIG_BSD_PROCESS_ACCT=y
 # CONFIG_FHANDLE is not set
 # CONFIG_TASKSTATS is not set
 # CONFIG_AUDIT is not set
-CONFIG_HAVE_GENERIC_HARDIRQS=y
 
 #
 # IRQ subsystem
 #
-CONFIG_GENERIC_HARDIRQS=y
 CONFIG_GENERIC_IRQ_SHOW=y
 
 #
index 089f398..5c3aef7 100644 (file)
@@ -30,8 +30,7 @@ int handle_page_fault(unsigned long address, unsigned long ip,
        pmd_t *pmd;
        pte_t *pte;
        int err = -EFAULT;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                (is_write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        *code_out = SEGV_MAPERR;
 
@@ -42,6 +41,8 @@ int handle_page_fault(unsigned long address, unsigned long ip,
        if (in_atomic())
                goto out_nosemaphore;
 
+       if (is_user)
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
@@ -58,12 +59,15 @@ retry:
 
 good_area:
        *code_out = SEGV_ACCERR;
-       if (is_write && !(vma->vm_flags & VM_WRITE))
-               goto out;
-
-       /* Don't require VM_READ|VM_EXEC for write faults! */
-       if (!is_write && !(vma->vm_flags & (VM_READ | VM_EXEC)))
-               goto out;
+       if (is_write) {
+               if (!(vma->vm_flags & VM_WRITE))
+                       goto out;
+               flags |= FAULT_FLAG_WRITE;
+       } else {
+               /* Don't require VM_READ|VM_EXEC for write faults! */
+               if (!(vma->vm_flags & (VM_READ | VM_EXEC)))
+                       goto out;
+       }
 
        do {
                int fault;
@@ -124,6 +128,8 @@ out_of_memory:
         * (which will retry the fault, or kill us if we got oom-killed).
         */
        up_read(&mm->mmap_sem);
+       if (!is_user)
+               goto out_nosemaphore;
        pagefault_out_of_memory();
        return 0;
 }
index 41bcc00..82cdd89 100644 (file)
@@ -2,7 +2,6 @@ config UNICORE32
        def_bool y
        select HAVE_MEMBLOCK
        select HAVE_GENERIC_DMA_COHERENT
-       select HAVE_GENERIC_HARDIRQS
        select HAVE_DMA_ATTRS
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_BZIP2
index f9b5c10..0dc922d 100644 (file)
@@ -209,8 +209,7 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        struct task_struct *tsk;
        struct mm_struct *mm;
        int fault, sig, code;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                ((!(fsr ^ 0x12)) ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        tsk = current;
        mm = tsk->mm;
@@ -222,6 +221,11 @@ static int do_pf(unsigned long addr, unsigned int fsr, struct pt_regs *regs)
        if (in_atomic() || !mm)
                goto no_context;
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
+       if (!(fsr ^ 0x12))
+               flags |= FAULT_FLAG_WRITE;
+
        /*
         * As per x86, we may deadlock here.  However, since the kernel only
         * validly references user space from well defined areas of the code,
@@ -278,6 +282,13 @@ retry:
               (VM_FAULT_ERROR | VM_FAULT_BADMAP | VM_FAULT_BADACCESS))))
                return 0;
 
+       /*
+        * If we are in kernel mode at this point, we
+        * have no context to handle this fault with.
+        */
+       if (!user_mode(regs))
+               goto no_context;
+
        if (fault & VM_FAULT_OOM) {
                /*
                 * We ran out of memory, call the OOM killer, and return to
@@ -288,13 +299,6 @@ retry:
                return 0;
        }
 
-       /*
-        * If we are in kernel mode at this point, we
-        * have no context to handle this fault with.
-        */
-       if (!user_mode(regs))
-               goto no_context;
-
        if (fault & VM_FAULT_SIGBUS) {
                /*
                 * We had some memory, but were unable to
index 30c40f0..ee2fb9d 100644 (file)
@@ -82,7 +82,6 @@ config X86
        select HAVE_USER_RETURN_NOTIFIER
        select ARCH_BINFMT_ELF_RANDOMIZE_PIE
        select HAVE_ARCH_JUMP_LABEL
-       select HAVE_GENERIC_HARDIRQS
        select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE
        select SPARSE_IRQ
        select GENERIC_FIND_FIRST_BIT
@@ -482,11 +481,12 @@ config X86_INTEL_LPSS
        bool "Intel Low Power Subsystem Support"
        depends on ACPI
        select COMMON_CLK
+       select PINCTRL
        ---help---
          Select to build support for Intel Low Power Subsystem such as
          found on Intel Lynxpoint PCH. Selecting this option enables
-         things like clock tree (common clock framework) which are needed
-         by the LPSS peripheral drivers.
+         things like clock tree (common clock framework) and pincontrol
+         which are needed by the LPSS peripheral drivers.
 
 config X86_RDC321X
        bool "RDC R-321x SoC"
index 8d16bef..3d19994 100644 (file)
@@ -315,21 +315,6 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd)
        return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY);
 }
 
-static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
-{
-       return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
-}
-
-static inline int pte_swp_soft_dirty(pte_t pte)
-{
-       return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
-}
-
-static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
-{
-       return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
-}
-
 static inline pte_t pte_file_clear_soft_dirty(pte_t pte)
 {
        return pte_clear_flags(pte, _PAGE_SOFT_DIRTY);
@@ -446,6 +431,7 @@ pte_t *populate_extra_pte(unsigned long vaddr);
 
 #ifndef __ASSEMBLY__
 #include <linux/mm_types.h>
+#include <linux/mmdebug.h>
 #include <linux/log2.h>
 
 static inline int pte_none(pte_t pte)
@@ -864,6 +850,24 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma,
 {
 }
 
+static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
+{
+       VM_BUG_ON(pte_present(pte));
+       return pte_set_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
+static inline int pte_swp_soft_dirty(pte_t pte)
+{
+       VM_BUG_ON(pte_present(pte));
+       return pte_flags(pte) & _PAGE_SWP_SOFT_DIRTY;
+}
+
+static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
+{
+       VM_BUG_ON(pte_present(pte));
+       return pte_clear_flags(pte, _PAGE_SWP_SOFT_DIRTY);
+}
+
 #include <asm-generic/pgtable.h>
 #endif /* __ASSEMBLY__ */
 
index f4843e0..0ecac25 100644 (file)
@@ -75,6 +75,9 @@
  * with swap entry format. On x86 bits 6 and 7 are *not* involved
  * into swap entry computation, but bit 6 is used for nonlinear
  * file mapping, so we borrow bit 7 for soft dirty tracking.
+ *
+ * Please note that this bit must be treated as swap dirty page
+ * mark if and only if the PTE has present bit clear!
  */
 #ifdef CONFIG_MEM_SOFT_DIRTY
 #define _PAGE_SWP_SOFT_DIRTY   _PAGE_PSE
index cf51200..e6d90ba 100644 (file)
@@ -62,6 +62,7 @@ static inline void __flush_tlb_all(void)
 
 static inline void __flush_tlb_one(unsigned long addr)
 {
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
        __flush_tlb_single(addr);
 }
 
@@ -84,14 +85,38 @@ static inline void __flush_tlb_one(unsigned long addr)
 
 #ifndef CONFIG_SMP
 
-#define flush_tlb() __flush_tlb()
-#define flush_tlb_all() __flush_tlb_all()
-#define local_flush_tlb() __flush_tlb()
+/* "_up" is for UniProcessor.
+ *
+ * This is a helper for other header functions.  *Not* intended to be called
+ * directly.  All global TLB flushes need to either call this, or to bump the
+ * vm statistics themselves.
+ */
+static inline void __flush_tlb_up(void)
+{
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       __flush_tlb();
+}
+
+static inline void flush_tlb_all(void)
+{
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
+       __flush_tlb_all();
+}
+
+static inline void flush_tlb(void)
+{
+       __flush_tlb_up();
+}
+
+static inline void local_flush_tlb(void)
+{
+       __flush_tlb_up();
+}
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
 {
        if (mm == current->active_mm)
-               __flush_tlb();
+               __flush_tlb_up();
 }
 
 static inline void flush_tlb_page(struct vm_area_struct *vma,
@@ -105,14 +130,14 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
                                   unsigned long start, unsigned long end)
 {
        if (vma->vm_mm == current->active_mm)
-               __flush_tlb();
+               __flush_tlb_up();
 }
 
 static inline void flush_tlb_mm_range(struct mm_struct *mm,
           unsigned long start, unsigned long end, unsigned long vmflag)
 {
        if (mm == current->active_mm)
-               __flush_tlb();
+               __flush_tlb_up();
 }
 
 static inline void native_flush_tlb_others(const struct cpumask *cpumask,
index d4cdfa6..ce2d0a2 100644 (file)
@@ -683,6 +683,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
        }
 
        /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb();
 
        /* Save MTRR state */
@@ -696,6 +697,7 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
 static void post_set(void) __releases(set_atomicity_lock)
 {
        /* Flush TLBs (no need to flush caches - they are disabled) */
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
        __flush_tlb();
 
        /* Intel (P6) standard MTRRs */
index 0abf674..9db76c3 100644 (file)
@@ -124,6 +124,7 @@ static struct event_constraint intel_ivb_event_constraints[] __read_mostly =
        INTEL_UEVENT_CONSTRAINT(0x0148, 0x4), /* L1D_PEND_MISS.PENDING */
        INTEL_UEVENT_CONSTRAINT(0x0279, 0xf), /* IDQ.EMTPY */
        INTEL_UEVENT_CONSTRAINT(0x019c, 0xf), /* IDQ_UOPS_NOT_DELIVERED.CORE */
+       INTEL_UEVENT_CONSTRAINT(0x02a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_LDM_PENDING */
        INTEL_UEVENT_CONSTRAINT(0x04a3, 0xf), /* CYCLE_ACTIVITY.CYCLES_NO_EXECUTE */
        INTEL_UEVENT_CONSTRAINT(0x05a3, 0xf), /* CYCLE_ACTIVITY.STALLS_L2_PENDING */
        INTEL_UEVENT_CONSTRAINT(0x06a3, 0xf), /* CYCLE_ACTIVITY.STALLS_LDM_PENDING */
@@ -898,8 +899,8 @@ static __initconst const u64 atom_hw_cache_event_ids
 static struct extra_reg intel_slm_extra_regs[] __read_mostly =
 {
        /* must define OFFCORE_RSP_X first, see intel_fixup_er() */
-       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffff, RSP_0),
-       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffff, RSP_1),
+       INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x768005ffffull, RSP_0),
+       INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0x768005ffffull, RSP_1),
        EVENT_EXTRA_END
 };
 
index 63438aa..ab3ba1c 100644 (file)
@@ -584,6 +584,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = {
        INTEL_EVENT_CONSTRAINT(0xd0, 0xf),    /* MEM_UOP_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xd1, 0xf),    /* MEM_LOAD_UOPS_RETIRED.* */
        INTEL_EVENT_CONSTRAINT(0xd2, 0xf),    /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */
+       INTEL_EVENT_CONSTRAINT(0xd3, 0xf),    /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */
        INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
        EVENT_CONSTRAINT_END
 };
index fd8011e..8ed4458 100644 (file)
@@ -2808,7 +2808,7 @@ uncore_get_event_constraint(struct intel_uncore_box *box, struct perf_event *eve
                        return c;
        }
 
-       if (event->hw.config == ~0ULL)
+       if (event->attr.config == UNCORE_FIXED_EVENT)
                return &constraint_fixed;
 
        if (type->constraints) {
@@ -3112,7 +3112,9 @@ static int uncore_pmu_event_init(struct perf_event *event)
                 */
                if (pmu->type->single_fixed && pmu->pmu_idx > 0)
                        return -EINVAL;
-               hwc->config = ~0ULL;
+
+               /* fixed counters have event field hardcoded to zero */
+               hwc->config = 0ULL;
        } else {
                hwc->config = event->attr.config & pmu->type->event_mask;
                if (pmu->type->ops->hw_config) {
index 1b69951..b077f4c 100644 (file)
@@ -487,21 +487,6 @@ ENDPROC(native_usergs_sysret64)
        TRACE_IRQS_OFF
        .endm
 
-ENTRY(save_rest)
-       PARTIAL_FRAME 1 (REST_SKIP+8)
-       movq 5*8+16(%rsp), %r11 /* save return address */
-       movq_cfi rbx, RBX+16
-       movq_cfi rbp, RBP+16
-       movq_cfi r12, R12+16
-       movq_cfi r13, R13+16
-       movq_cfi r14, R14+16
-       movq_cfi r15, R15+16
-       movq %r11, 8(%rsp)      /* return address */
-       FIXUP_TOP_OF_STACK %r11, 16
-       ret
-       CFI_ENDPROC
-END(save_rest)
-
 /* save complete stack frame */
        .pushsection .kprobes.text, "ax"
 ENTRY(save_paranoid)
index aecc98a..6cacab6 100644 (file)
@@ -653,6 +653,7 @@ static void announce_cpu(int cpu, int apicid)
 {
        static int current_node = -1;
        int node = early_cpu_to_node(cpu);
+       int max_cpu_present = find_last_bit(cpumask_bits(cpu_present_mask), NR_CPUS);
 
        if (system_state == SYSTEM_BOOTING) {
                if (node != current_node) {
@@ -661,7 +662,7 @@ static void announce_cpu(int cpu, int apicid)
                        current_node = node;
                        pr_info("Booting Node %3d, Processors ", node);
                }
-               pr_cont(" #%d%s", cpu, cpu == (nr_cpu_ids - 1) ? " OK\n" : "");
+               pr_cont(" #%4d%s", cpu, cpu == max_cpu_present ? " OK\n" : "");
                return;
        } else
                pr_info("Booting Node %d Processor %d APIC 0x%x\n",
index 2bc1e81..ddc3f3d 100644 (file)
@@ -2025,6 +2025,17 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt)
        return rc;
 }
 
+static int em_ret_far_imm(struct x86_emulate_ctxt *ctxt)
+{
+        int rc;
+
+        rc = em_ret_far(ctxt);
+        if (rc != X86EMUL_CONTINUE)
+                return rc;
+        rsp_increment(ctxt, ctxt->src.val);
+        return X86EMUL_CONTINUE;
+}
+
 static int em_cmpxchg(struct x86_emulate_ctxt *ctxt)
 {
        /* Save real source value, then compare EAX against destination. */
@@ -3763,7 +3774,8 @@ static const struct opcode opcode_table[256] = {
        G(ByteOp, group11), G(0, group11),
        /* 0xC8 - 0xCF */
        I(Stack | SrcImmU16 | Src2ImmByte, em_enter), I(Stack, em_leave),
-       N, I(ImplicitOps | Stack, em_ret_far),
+       I(ImplicitOps | Stack | SrcImmU16, em_ret_far_imm),
+       I(ImplicitOps | Stack, em_ret_far),
        D(ImplicitOps), DI(SrcImmByte, intn),
        D(ImplicitOps | No64), II(ImplicitOps, em_iret, iret),
        /* 0xD0 - 0xD7 */
index 6e2d2c8..dce0df8 100644 (file)
@@ -4421,13 +4421,12 @@ void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm)
        }
 }
 
-static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct kvm *kvm;
        int nr_to_scan = sc->nr_to_scan;
-
-       if (nr_to_scan == 0)
-               goto out;
+       unsigned long freed = 0;
 
        raw_spin_lock(&kvm_lock);
 
@@ -4462,25 +4461,37 @@ static int mmu_shrink(struct shrinker *shrink, struct shrink_control *sc)
                        goto unlock;
                }
 
-               prepare_zap_oldest_mmu_page(kvm, &invalid_list);
+               if (prepare_zap_oldest_mmu_page(kvm, &invalid_list))
+                       freed++;
                kvm_mmu_commit_zap_page(kvm, &invalid_list);
 
 unlock:
                spin_unlock(&kvm->mmu_lock);
                srcu_read_unlock(&kvm->srcu, idx);
 
+               /*
+                * unfair on small ones
+                * per-vm shrinkers cry out
+                * sadness comes quickly
+                */
                list_move_tail(&kvm->vm_list, &vm_list);
                break;
        }
 
        raw_spin_unlock(&kvm_lock);
+       return freed;
 
-out:
+}
+
+static unsigned long
+mmu_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
        return percpu_counter_read_positive(&kvm_total_used_mmu_pages);
 }
 
 static struct shrinker mmu_shrinker = {
-       .shrink = mmu_shrink,
+       .count_objects = mmu_shrink_count,
+       .scan_objects = mmu_shrink_scan,
        .seeks = DEFAULT_SEEKS * 10,
 };
 
index 0433301..ad75d77 100644 (file)
@@ -99,6 +99,7 @@ struct guest_walker {
        pt_element_t prefetch_ptes[PTE_PREFETCH_NUM];
        gpa_t pte_gpa[PT_MAX_FULL_LEVELS];
        pt_element_t __user *ptep_user[PT_MAX_FULL_LEVELS];
+       bool pte_writable[PT_MAX_FULL_LEVELS];
        unsigned pt_access;
        unsigned pte_access;
        gfn_t gfn;
@@ -235,6 +236,22 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
                if (pte == orig_pte)
                        continue;
 
+               /*
+                * If the slot is read-only, simply do not process the accessed
+                * and dirty bits.  This is the correct thing to do if the slot
+                * is ROM, and page tables in read-as-ROM/write-as-MMIO slots
+                * are only supported if the accessed and dirty bits are already
+                * set in the ROM (so that MMIO writes are never needed).
+                *
+                * Note that NPT does not allow this at all and faults, since
+                * it always wants nested page table entries for the guest
+                * page tables to be writable.  And EPT works but will simply
+                * overwrite the read-only memory to set the accessed and dirty
+                * bits.
+                */
+               if (unlikely(!walker->pte_writable[level - 1]))
+                       continue;
+
                ret = FNAME(cmpxchg_gpte)(vcpu, mmu, ptep_user, index, orig_pte, pte);
                if (ret)
                        return ret;
@@ -309,7 +326,8 @@ retry_walk:
                        goto error;
                real_gfn = gpa_to_gfn(real_gfn);
 
-               host_addr = gfn_to_hva(vcpu->kvm, real_gfn);
+               host_addr = gfn_to_hva_prot(vcpu->kvm, real_gfn,
+                                           &walker->pte_writable[walker->level - 1]);
                if (unlikely(kvm_is_error_hva(host_addr)))
                        goto error;
 
index 1f1da43..a1216de 100644 (file)
@@ -5339,6 +5339,15 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
                return 0;
        }
 
+       /*
+        * EPT violation happened while executing iret from NMI,
+        * "blocked by NMI" bit has to be set before next VM entry.
+        * There are errata that may cause this bit to not be set:
+        * AAK134, BY25.
+        */
+       if (exit_qualification & INTR_INFO_UNBLOCK_NMI)
+               vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
+
        gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
        trace_kvm_page_fault(gpa, exit_qualification);
 
@@ -7766,6 +7775,10 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
                vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
                vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
                vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
+               __clear_bit(VCPU_EXREG_PDPTR,
+                               (unsigned long *)&vcpu->arch.regs_avail);
+               __clear_bit(VCPU_EXREG_PDPTR,
+                               (unsigned long *)&vcpu->arch.regs_dirty);
        }
 
        kvm_register_write(vcpu, VCPU_REGS_RSP, vmcs12->guest_rsp);
index 654be4a..3aaeffc 100644 (file)
@@ -842,23 +842,15 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
        force_sig_info_fault(SIGBUS, code, address, tsk, fault);
 }
 
-static noinline int
+static noinline void
 mm_fault_error(struct pt_regs *regs, unsigned long error_code,
               unsigned long address, unsigned int fault)
 {
-       /*
-        * Pagefault was interrupted by SIGKILL. We have no reason to
-        * continue pagefault.
-        */
-       if (fatal_signal_pending(current)) {
-               if (!(fault & VM_FAULT_RETRY))
-                       up_read(&current->mm->mmap_sem);
-               if (!(error_code & PF_USER))
-                       no_context(regs, error_code, address, 0, 0);
-               return 1;
+       if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
+               up_read(&current->mm->mmap_sem);
+               no_context(regs, error_code, address, 0, 0);
+               return;
        }
-       if (!(fault & VM_FAULT_ERROR))
-               return 0;
 
        if (fault & VM_FAULT_OOM) {
                /* Kernel mode? Handle exceptions or die: */
@@ -866,7 +858,7 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
                        up_read(&current->mm->mmap_sem);
                        no_context(regs, error_code, address,
                                   SIGSEGV, SEGV_MAPERR);
-                       return 1;
+                       return;
                }
 
                up_read(&current->mm->mmap_sem);
@@ -884,7 +876,6 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
                else
                        BUG();
        }
-       return 1;
 }
 
 static int spurious_fault_check(unsigned long error_code, pte_t *pte)
@@ -1011,9 +1002,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
        unsigned long address;
        struct mm_struct *mm;
        int fault;
-       int write = error_code & PF_WRITE;
-       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE |
-                                       (write ? FAULT_FLAG_WRITE : 0);
+       unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
 
        tsk = current;
        mm = tsk->mm;
@@ -1083,6 +1072,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
        if (user_mode_vm(regs)) {
                local_irq_enable();
                error_code |= PF_USER;
+               flags |= FAULT_FLAG_USER;
        } else {
                if (regs->flags & X86_EFLAGS_IF)
                        local_irq_enable();
@@ -1109,6 +1099,9 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code)
                return;
        }
 
+       if (error_code & PF_WRITE)
+               flags |= FAULT_FLAG_WRITE;
+
        /*
         * When running in the kernel we expect faults to occur only to
         * addresses in user space.  All other faults represent errors in
@@ -1187,9 +1180,17 @@ good_area:
         */
        fault = handle_mm_fault(mm, vma, address, flags);
 
-       if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) {
-               if (mm_fault_error(regs, error_code, address, fault))
-                       return;
+       /*
+        * If we need to retry but a fatal signal is pending, handle the
+        * signal first. We do not need to release the mmap_sem because it
+        * would already be released in __lock_page_or_retry in mm/filemap.c.
+        */
+       if (unlikely((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)))
+               return;
+
+       if (unlikely(fault & VM_FAULT_ERROR)) {
+               mm_fault_error(regs, error_code, address, fault);
+               return;
        }
 
        /*
index 7e73e8c..9d980d8 100644 (file)
@@ -59,6 +59,10 @@ follow_huge_pmd(struct mm_struct *mm, unsigned long address,
        return NULL;
 }
 
+int pmd_huge_support(void)
+{
+       return 0;
+}
 #else
 
 struct page *
@@ -77,6 +81,10 @@ int pud_huge(pud_t pud)
        return !!(pud_val(pud) & _PAGE_PSE);
 }
 
+int pmd_huge_support(void)
+{
+       return 1;
+}
 #endif
 
 /* x86_64 also uses this file */
index 282375f..ae699b3 100644 (file)
@@ -103,6 +103,7 @@ static void flush_tlb_func(void *info)
        if (f->flush_mm != this_cpu_read(cpu_tlbstate.active_mm))
                return;
 
+       count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
        if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
                if (f->flush_end == TLB_FLUSH_ALL)
                        local_flush_tlb();
@@ -130,6 +131,7 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
        info.flush_start = start;
        info.flush_end = end;
 
+       count_vm_event(NR_TLB_REMOTE_FLUSH);
        if (is_uv_system()) {
                unsigned int cpu;
 
@@ -149,6 +151,7 @@ void flush_tlb_current_task(void)
 
        preempt_disable();
 
+       count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
        local_flush_tlb();
        if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
                flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
@@ -211,16 +214,19 @@ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
        act_entries = mm->total_vm > tlb_entries ? tlb_entries : mm->total_vm;
 
        /* tlb_flushall_shift is on balance point, details in commit log */
-       if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
+       if ((end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift) {
+               count_vm_event(NR_TLB_LOCAL_FLUSH_ALL);
                local_flush_tlb();
-       else {
+       else {
                if (has_large_page(mm, start, end)) {
                        local_flush_tlb();
                        goto flush_all;
                }
                /* flush range by one by one 'invlpg' */
-               for (addr = start; addr < end;  addr += PAGE_SIZE)
+               for (addr = start; addr < end;  addr += PAGE_SIZE) {
+                       count_vm_event(NR_TLB_LOCAL_FLUSH_ONE);
                        __flush_tlb_single(addr);
+               }
 
                if (cpumask_any_but(mm_cpumask(mm),
                                smp_processor_id()) < nr_cpu_ids)
@@ -256,6 +262,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
 
 static void do_flush_tlb_all(void *info)
 {
+       count_vm_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
        __flush_tlb_all();
        if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
                leave_mm(smp_processor_id());
@@ -263,6 +270,7 @@ static void do_flush_tlb_all(void *info)
 
 void flush_tlb_all(void)
 {
+       count_vm_event(NR_TLB_REMOTE_FLUSH);
        on_each_cpu(do_flush_tlb_all, NULL, 1);
 }
 
index 7ea6451..8d24dcb 100644 (file)
@@ -7,7 +7,6 @@ config XTENSA
        select HAVE_IDE
        select GENERIC_ATOMIC64
        select GENERIC_CLOCKEVENTS
-       select HAVE_GENERIC_HARDIRQS
        select VIRT_TO_BUS
        select GENERIC_IRQ_SHOW
        select GENERIC_CPU_DEVICES
index 136224b..81250ec 100644 (file)
@@ -55,10 +55,10 @@ ifneq ($(CONFIG_LD_NO_RELAX),)
 LDFLAGS := --no-relax
 endif
 
-ifeq ($(shell echo -e __XTENSA_EB__ | $(CC) -E - | grep -v "\#"),1)
+ifeq ($(shell echo __XTENSA_EB__ | $(CC) -E - | grep -v "\#"),1)
 CHECKFLAGS += -D__XTENSA_EB__
 endif
-ifeq ($(shell echo -e __XTENSA_EL__ | $(CC) -E - | grep -v "\#"),1)
+ifeq ($(shell echo __XTENSA_EL__ | $(CC) -E - | grep -v "\#"),1)
 CHECKFLAGS += -D__XTENSA_EL__
 endif
 
index 64ffc4b..ca20a89 100644 (file)
@@ -12,7 +12,7 @@
 KBUILD_CFLAGS  += -fno-builtin -Iarch/$(ARCH)/boot/include
 HOSTFLAGS      += -Iarch/$(ARCH)/boot/include
 
-BIG_ENDIAN     := $(shell echo -e __XTENSA_EB__ | $(CC) -E - | grep -v "\#")
+BIG_ENDIAN     := $(shell echo __XTENSA_EB__ | $(CC) -E - | grep -v "\#")
 
 export ccflags-y
 export BIG_ENDIAN
index a182a4e..f6000fe 100644 (file)
@@ -8,7 +8,6 @@ CONFIG_XTENSA=y
 # CONFIG_UID16 is not set
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_HAVE_DEC_LOCK=y
-CONFIG_GENERIC_HARDIRQS=y
 
 #
 # Code maturity level options
index 77c52f8..4f23320 100644 (file)
@@ -9,7 +9,6 @@ CONFIG_XTENSA=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_HARDIRQS=y
 # CONFIG_ARCH_HAS_ILOG2_U32 is not set
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_NO_IOPORT=y
index 4799c6a..d929f77 100644 (file)
@@ -9,7 +9,6 @@ CONFIG_XTENSA=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_FIND_NEXT_BIT=y
 CONFIG_GENERIC_HWEIGHT=y
-CONFIG_GENERIC_HARDIRQS=y
 # CONFIG_ARCH_HAS_ILOG2_U32 is not set
 # CONFIG_ARCH_HAS_ILOG2_U64 is not set
 CONFIG_NO_IOPORT=y
index b24de67..4ba9f51 100644 (file)
@@ -82,6 +82,7 @@
 #define PS_CALLINC_SHIFT       16
 #define PS_CALLINC_MASK                0x00030000
 #define PS_OWB_SHIFT           8
+#define PS_OWB_WIDTH           4
 #define PS_OWB_MASK            0x00000F00
 #define PS_RING_SHIFT          6
 #define PS_RING_MASK           0x000000C0
index 69f9017..27fa3c1 100644 (file)
 # error "Bad timer number for Linux configurations!"
 #endif
 
-#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 extern unsigned long ccount_freq;
-#define CCOUNT_PER_JIFFY (ccount_freq / HZ)
-#else
-#define CCOUNT_PER_JIFFY (CONFIG_XTENSA_CPU_CLOCK*(1000000UL/HZ))
-#endif
-
 
 typedef unsigned long long cycles_t;
 
index aa2e87b..d4cef60 100644 (file)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -171,7 +171,6 @@ ENTRY(fast_unaligned)
        s32i    a8, a2, PT_AREG8
 
        rsr     a0, depc
-       xsr     a3, excsave1
        s32i    a0, a2, PT_AREG2
        s32i    a3, a2, PT_AREG3
 
index 6476574..a482df5 100644 (file)
@@ -32,9 +32,9 @@
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -225,9 +225,9 @@ ENDPROC(coprocessor_restore)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -245,7 +245,6 @@ ENTRY(fast_coprocessor)
 
        /* Save remaining registers a1-a3 and SAR */
 
-       xsr     a3, excsave1
        s32i    a3, a2, PT_AREG3
        rsr     a3, sar
        s32i    a1, a2, PT_AREG1
index 9298742..de1dfa1 100644 (file)
@@ -31,8 +31,6 @@
 /* Unimplemented features. */
 
 #undef KERNEL_STACK_OVERFLOW_CHECK
-#undef PREEMPTIBLE_KERNEL
-#undef ALLOCA_EXCEPTION_IN_IRAM
 
 /* Not well tested.
  *
@@ -92,9 +90,9 @@
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original value in depc
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave1: a3
+ *   excsave1: dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
 
 ENTRY(user_exception)
 
-       /* Save a2, a3, and depc, restore excsave_1 and set SP. */
+       /* Save a1, a2, a3, and set SP. */
 
-       xsr     a3, excsave1
        rsr     a0, depc
        s32i    a1, a2, PT_AREG1
        s32i    a0, a2, PT_AREG2
@@ -238,9 +235,9 @@ ENDPROC(user_exception)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -256,9 +253,8 @@ ENDPROC(user_exception)
 
 ENTRY(kernel_exception)
 
-       /* Save a0, a2, a3, DEPC and set SP. */
+       /* Save a1, a2, a3, and set SP. */
 
-       xsr     a3, excsave1            # restore a3, excsave_1
        rsr     a0, depc                # get a2
        s32i    a1, a2, PT_AREG1
        s32i    a0, a2, PT_AREG2
@@ -409,7 +405,7 @@ common_exception:
         * exception handler and call the exception handler.
         */
 
-       movi    a4, exc_table
+       rsr     a4, excsave1
        mov     a6, a1                  # pass stack frame
        mov     a7, a0                  # pass EXCCAUSE
        addx4   a4, a0, a4
@@ -423,28 +419,15 @@ common_exception:
        .global common_exception_return
 common_exception_return:
 
-#ifdef CONFIG_TRACE_IRQFLAGS
-       l32i    a4, a1, PT_DEPC
-       /* Double exception means we came here with an exception
-        * while PS.EXCM was set, i.e. interrupts disabled.
-        */
-       bgeui   a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
-       l32i    a4, a1, PT_EXCCAUSE
-       bnei    a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
-       /* We came here with an interrupt means interrupts were enabled
-        * and we'll reenable them on return.
-        */
-       movi    a4, trace_hardirqs_on
-       callx4  a4
 1:
-#endif
+       rsil    a2, LOCKLEVEL
 
        /* Jump if we are returning from kernel exceptions. */
 
-1:     l32i    a3, a1, PT_PS
-       _bbci.l a3, PS_UM_BIT, 4f
-
-       rsil    a2, 0
+       l32i    a3, a1, PT_PS
+       GET_THREAD_INFO(a2, a1)
+       l32i    a4, a2, TI_FLAGS
+       _bbci.l a3, PS_UM_BIT, 6f
 
        /* Specific to a user exception exit:
         * We need to check some flags for signal handling and rescheduling,
@@ -453,9 +436,6 @@ common_exception_return:
         * Note that we don't disable interrupts here. 
         */
 
-       GET_THREAD_INFO(a2,a1)
-       l32i    a4, a2, TI_FLAGS
-
        _bbsi.l a4, TIF_NEED_RESCHED, 3f
        _bbsi.l a4, TIF_NOTIFY_RESUME, 2f
        _bbci.l a4, TIF_SIGPENDING, 5f
@@ -465,6 +445,7 @@ common_exception_return:
 
        /* Call do_signal() */
 
+       rsil    a2, 0
        movi    a4, do_notify_resume    # int do_notify_resume(struct pt_regs*)
        mov     a6, a1
        callx4  a4
@@ -472,10 +453,24 @@ common_exception_return:
 
 3:     /* Reschedule */
 
+       rsil    a2, 0
        movi    a4, schedule    # void schedule (void)
        callx4  a4
        j       1b
 
+#ifdef CONFIG_PREEMPT
+6:
+       _bbci.l a4, TIF_NEED_RESCHED, 4f
+
+       /* Check current_thread_info->preempt_count */
+
+       l32i    a4, a2, TI_PRE_COUNT
+       bnez    a4, 4f
+       movi    a4, preempt_schedule_irq
+       callx4  a4
+       j       1b
+#endif
+
 5:
 #ifdef CONFIG_DEBUG_TLB_SANITY
        l32i    a4, a1, PT_DEPC
@@ -483,7 +478,24 @@ common_exception_return:
        movi    a4, check_tlb_sanity
        callx4  a4
 #endif
-4:     /* Restore optional registers. */
+6:
+4:
+#ifdef CONFIG_TRACE_IRQFLAGS
+       l32i    a4, a1, PT_DEPC
+       /* Double exception means we came here with an exception
+        * while PS.EXCM was set, i.e. interrupts disabled.
+        */
+       bgeui   a4, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
+       l32i    a4, a1, PT_EXCCAUSE
+       bnei    a4, EXCCAUSE_LEVEL1_INTERRUPT, 1f
+       /* We came here with an interrupt means interrupts were enabled
+        * and we'll reenable them on return.
+        */
+       movi    a4, trace_hardirqs_on
+       callx4  a4
+1:
+#endif
+       /* Restore optional registers. */
 
        load_xtregs_opt a1 a2 a4 a5 a6 a7 PT_XTREGS_OPT
 
@@ -570,29 +582,6 @@ user_exception_exit:
 
 kernel_exception_exit:
 
-#ifdef PREEMPTIBLE_KERNEL
-
-#ifdef CONFIG_PREEMPT
-
-       /*
-        * Note: We've just returned from a call4, so we have
-        * at least 4 addt'l regs.
-        */
-
-       /* Check current_thread_info->preempt_count */
-
-       GET_THREAD_INFO(a2)
-       l32i    a3, a2, TI_PREEMPT
-       bnez    a3, 1f
-
-       l32i    a2, a2, TI_FLAGS
-
-1:
-
-#endif
-
-#endif
-
        /* Check if we have to do a movsp.
         *
         * We only have to do a movsp if the previous window-frame has
@@ -829,176 +818,63 @@ ENDPROC(unrecoverable_exception)
  *
  *  The ALLOCA handler is entered when user code executes the MOVSP
  *  instruction and the caller's frame is not in the register file.
- *  In this case, the caller frame's a0..a3 are on the stack just
- *  below sp (a1), and this handler moves them.
  *
- *  For "MOVSP <ar>,<as>" without destination register a1, this routine
- *  simply moves the value from <as> to <ar> without moving the save area.
+ * This algorithm was taken from the Ross Morley's RTOS Porting Layer:
+ *
+ *    /home/ross/rtos/porting/XtensaRTOS-PortingLayer-20090507/xtensa_vectors.S
+ *
+ * It leverages the existing window spill/fill routines and their support for
+ * double exceptions. The 'movsp' instruction will only cause an exception if
+ * the next window needs to be loaded. In fact this ALLOCA exception may be
+ * replaced at some point by changing the hardware to do a underflow exception
+ * of the proper size instead.
+ *
+ * This algorithm simply backs out the register changes started by the user
+ * excpetion handler, makes it appear that we have started a window underflow
+ * by rotating the window back and then setting the old window base (OWB) in
+ * the 'ps' register with the rolled back window base. The 'movsp' instruction
+ * will be re-executed and this time since the next window frames is in the
+ * active AR registers it won't cause an exception.
+ *
+ * If the WindowUnderflow code gets a TLB miss the page will get mapped
+ * the the partial windeowUnderflow will be handeled in the double exception
+ * handler.
  *
  * Entry condition:
  *
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
  */
 
-#if XCHAL_HAVE_BE
-#define _EXTUI_MOVSP_SRC(ar)   extui ar, ar, 4, 4
-#define _EXTUI_MOVSP_DST(ar)   extui ar, ar, 0, 4
-#else
-#define _EXTUI_MOVSP_SRC(ar)   extui ar, ar, 0, 4
-#define _EXTUI_MOVSP_DST(ar)   extui ar, ar, 4, 4
-#endif
-
 ENTRY(fast_alloca)
+       rsr     a0, windowbase
+       rotw    -1
+       rsr     a2, ps
+       extui   a3, a2, PS_OWB_SHIFT, PS_OWB_WIDTH
+       xor     a3, a3, a4
+       l32i    a4, a6, PT_AREG0
+       l32i    a1, a6, PT_DEPC
+       rsr     a6, depc
+       wsr     a1, depc
+       slli    a3, a3, PS_OWB_SHIFT
+       xor     a2, a2, a3
+       wsr     a2, ps
+       rsync
 
-       /* We shouldn't be in a double exception. */
-
-       l32i    a0, a2, PT_DEPC
-       _bgeui  a0, VALID_DOUBLE_EXCEPTION_ADDRESS, .Lunhandled_double
-
-       rsr     a0, depc                # get a2
-       s32i    a4, a2, PT_AREG4        # save a4 and
-       s32i    a0, a2, PT_AREG2        # a2 to stack
-
-       /* Exit critical section. */
-
-       movi    a0, 0
-       s32i    a0, a3, EXC_TABLE_FIXUP
-
-       /* Restore a3, excsave_1 */
-
-       xsr     a3, excsave1            # make sure excsave_1 is valid for dbl.
-       rsr     a4, epc1                # get exception address
-       s32i    a3, a2, PT_AREG3        # save a3 to stack
-
-#ifdef ALLOCA_EXCEPTION_IN_IRAM
-#error iram not supported
-#else
-       /* Note: l8ui not allowed in IRAM/IROM!! */
-       l8ui    a0, a4, 1               # read as(src) from MOVSP instruction
-#endif
-       movi    a3, .Lmovsp_src
-       _EXTUI_MOVSP_SRC(a0)            # extract source register number
-       addx8   a3, a0, a3
-       jx      a3
-
-.Lunhandled_double:
-       wsr     a0, excsave1
-       movi    a0, unrecoverable_exception
-       callx0  a0
-
-       .align 8
-.Lmovsp_src:
-       l32i    a3, a2, PT_AREG0;       _j 1f;  .align 8
-       mov     a3, a1;                 _j 1f;  .align 8
-       l32i    a3, a2, PT_AREG2;       _j 1f;  .align 8
-       l32i    a3, a2, PT_AREG3;       _j 1f;  .align 8
-       l32i    a3, a2, PT_AREG4;       _j 1f;  .align 8
-       mov     a3, a5;                 _j 1f;  .align 8
-       mov     a3, a6;                 _j 1f;  .align 8
-       mov     a3, a7;                 _j 1f;  .align 8
-       mov     a3, a8;                 _j 1f;  .align 8
-       mov     a3, a9;                 _j 1f;  .align 8
-       mov     a3, a10;                _j 1f;  .align 8
-       mov     a3, a11;                _j 1f;  .align 8
-       mov     a3, a12;                _j 1f;  .align 8
-       mov     a3, a13;                _j 1f;  .align 8
-       mov     a3, a14;                _j 1f;  .align 8
-       mov     a3, a15;                _j 1f;  .align 8
-
-1:
-
-#ifdef ALLOCA_EXCEPTION_IN_IRAM
-#error iram not supported
-#else
-       l8ui    a0, a4, 0               # read ar(dst) from MOVSP instruction
-#endif
-       addi    a4, a4, 3               # step over movsp
-       _EXTUI_MOVSP_DST(a0)            # extract destination register
-       wsr     a4, epc1                # save new epc_1
-
-       _bnei   a0, 1, 1f               # no 'movsp a1, ax': jump
-
-       /* Move the save area. This implies the use of the L32E
-        * and S32E instructions, because this move must be done with
-        * the user's PS.RING privilege levels, not with ring 0
-        * (kernel's) privileges currently active with PS.EXCM
-        * set. Note that we have stil registered a fixup routine with the
-        * double exception vector in case a double exception occurs.
-        */
-
-       /* a0,a4:avail a1:old user stack a2:exc. stack a3:new user stack. */
-
-       l32e    a0, a1, -16
-       l32e    a4, a1, -12
-       s32e    a0, a3, -16
-       s32e    a4, a3, -12
-       l32e    a0, a1, -8
-       l32e    a4, a1, -4
-       s32e    a0, a3, -8
-       s32e    a4, a3, -4
-
-       /* Restore stack-pointer and all the other saved registers. */
-
-       mov     a1, a3
-
-       l32i    a4, a2, PT_AREG4
-       l32i    a3, a2, PT_AREG3
-       l32i    a0, a2, PT_AREG0
-       l32i    a2, a2, PT_AREG2
-       rfe
-
-       /*  MOVSP <at>,<as>  was invoked with <at> != a1.
-        *  Because the stack pointer is not being modified,
-        *  we should be able to just modify the pointer
-        *  without moving any save area.
-        *  The processor only traps these occurrences if the
-        *  caller window isn't live, so unfortunately we can't
-        *  use this as an alternate trap mechanism.
-        *  So we just do the move.  This requires that we
-        *  resolve the destination register, not just the source,
-        *  so there's some extra work.
-        *  (PERHAPS NOT REALLY NEEDED, BUT CLEANER...)
-        */
-
-       /* a0 dst-reg, a1 user-stack, a2 stack, a3 value of src reg. */
-
-1:     movi    a4, .Lmovsp_dst
-       addx8   a4, a0, a4
-       jx      a4
-
-       .align 8
-.Lmovsp_dst:
-       s32i    a3, a2, PT_AREG0;       _j 1f;  .align 8
-       mov     a1, a3;                 _j 1f;  .align 8
-       s32i    a3, a2, PT_AREG2;       _j 1f;  .align 8
-       s32i    a3, a2, PT_AREG3;       _j 1f;  .align 8
-       s32i    a3, a2, PT_AREG4;       _j 1f;  .align 8
-       mov     a5, a3;                 _j 1f;  .align 8
-       mov     a6, a3;                 _j 1f;  .align 8
-       mov     a7, a3;                 _j 1f;  .align 8
-       mov     a8, a3;                 _j 1f;  .align 8
-       mov     a9, a3;                 _j 1f;  .align 8
-       mov     a10, a3;                _j 1f;  .align 8
-       mov     a11, a3;                _j 1f;  .align 8
-       mov     a12, a3;                _j 1f;  .align 8
-       mov     a13, a3;                _j 1f;  .align 8
-       mov     a14, a3;                _j 1f;  .align 8
-       mov     a15, a3;                _j 1f;  .align 8
-
-1:     l32i    a4, a2, PT_AREG4
-       l32i    a3, a2, PT_AREG3
-       l32i    a0, a2, PT_AREG0
-       l32i    a2, a2, PT_AREG2
-       rfe
-
+       _bbci.l a4, 31, 4f
+       rotw    -1
+       _bbci.l a8, 30, 8f
+       rotw    -1
+       j       _WindowUnderflow12
+8:     j       _WindowUnderflow8
+4:     j       _WindowUnderflow4
 ENDPROC(fast_alloca)
 
 /*
@@ -1015,9 +891,9 @@ ENDPROC(fast_alloca)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  */
 
 ENTRY(fast_syscall_kernel)
@@ -1064,7 +940,6 @@ ENTRY(fast_syscall_unrecoverable)
 
        l32i    a0, a2, PT_AREG0        # restore a0
        xsr     a2, depc                # restore a2, depc
-       rsr     a3, excsave1
 
        wsr     a0, excsave1
        movi    a0, unrecoverable_exception
@@ -1086,10 +961,10 @@ ENDPROC(fast_syscall_unrecoverable)
  *   a0:       a2 (syscall-nr), original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in a0 and DEPC
- *   a3:       dispatch table, original in excsave_1
+ *   a3:       a3
  *   a4..a15:  unchanged
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -1122,8 +997,6 @@ ENDPROC(fast_syscall_unrecoverable)
 
 ENTRY(fast_syscall_xtensa)
 
-       xsr     a3, excsave1            # restore a3, excsave1
-
        s32i    a7, a2, PT_AREG7        # we need an additional register
        movi    a7, 4                   # sizeof(unsigned int)
        access_ok a3, a7, a0, a2, .Leac # a0: scratch reg, a2: sp
@@ -1186,9 +1059,9 @@ ENDPROC(fast_syscall_xtensa)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  * Note: We assume the stack pointer is EXC_TABLE_KSTK in the fixup handler.
  */
@@ -1197,15 +1070,16 @@ ENTRY(fast_syscall_spill_registers)
 
        /* Register a FIXUP handler (pass current wb as a parameter) */
 
+       xsr     a3, excsave1
        movi    a0, fast_syscall_spill_registers_fixup
        s32i    a0, a3, EXC_TABLE_FIXUP
        rsr     a0, windowbase
        s32i    a0, a3, EXC_TABLE_PARAM
+       xsr     a3, excsave1            # restore a3 and excsave_1
 
-       /* Save a3 and SAR on stack. */
+       /* Save a3, a4 and SAR on stack. */
 
        rsr     a0, sar
-       xsr     a3, excsave1            # restore a3 and excsave_1
        s32i    a3, a2, PT_AREG3
        s32i    a4, a2, PT_AREG4
        s32i    a0, a2, PT_AREG5        # store SAR to PT_AREG5
@@ -1259,14 +1133,14 @@ fast_syscall_spill_registers_fixup:
         * in WS, so that the exception handlers save them to the task stack.
         */
 
-       rsr     a3, excsave1    # get spill-mask
+       xsr     a3, excsave1    # get spill-mask
        slli    a2, a3, 1       # shift left by one
 
        slli    a3, a2, 32-WSBITS
        src     a2, a2, a3      # a1 = xxwww1yyxxxwww1yy......
        wsr     a2, windowstart # set corrected windowstart
 
-       movi    a3, exc_table
+       rsr     a3, excsave1
        l32i    a2, a3, EXC_TABLE_DOUBLE_SAVE   # restore a2
        l32i    a3, a3, EXC_TABLE_PARAM # original WB (in user task)
 
@@ -1303,7 +1177,7 @@ fast_syscall_spill_registers_fixup:
 
        /* Jump to the exception handler. */
 
-       movi    a3, exc_table
+       rsr     a3, excsave1
        rsr     a0, exccause
        addx4   a0, a0, a3                      # find entry in table
        l32i    a0, a0, EXC_TABLE_FAST_USER     # load handler
@@ -1320,6 +1194,7 @@ fast_syscall_spill_registers_fixup_return:
        xsr     a3, excsave1
        movi    a2, fast_syscall_spill_registers_fixup
        s32i    a2, a3, EXC_TABLE_FIXUP
+       s32i    a0, a3, EXC_TABLE_DOUBLE_SAVE
        rsr     a2, windowbase
        s32i    a2, a3, EXC_TABLE_PARAM
        l32i    a2, a3, EXC_TABLE_KSTK
@@ -1331,11 +1206,6 @@ fast_syscall_spill_registers_fixup_return:
        wsr     a3, windowbase
        rsync
 
-       /* Restore a3 and return. */
-
-       movi    a3, exc_table
-       xsr     a3, excsave1
-
        rfde
 
 
@@ -1522,9 +1392,8 @@ ENTRY(_spill_registers)
 
        movi    a0, 0
 
-       movi    a3, exc_table
+       rsr     a3, excsave1
        l32i    a1, a3, EXC_TABLE_KSTK
-       wsr     a3, excsave1
 
        movi    a4, (1 << PS_WOE_BIT) | LOCKLEVEL
        wsr     a4, ps
@@ -1568,9 +1437,9 @@ ENDPROC(fast_second_level_miss_double_kernel)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -1578,9 +1447,10 @@ ENDPROC(fast_second_level_miss_double_kernel)
 
 ENTRY(fast_second_level_miss)
 
-       /* Save a1. Note: we don't expect a double exception. */
+       /* Save a1 and a3. Note: we don't expect a double exception. */
 
        s32i    a1, a2, PT_AREG1
+       s32i    a3, a2, PT_AREG3
 
        /* We need to map the page of PTEs for the user task.  Find
         * the pointer to that page.  Also, it's possible for tsk->mm
@@ -1602,9 +1472,6 @@ ENTRY(fast_second_level_miss)
        l32i    a0, a1, TASK_MM         # tsk->mm
        beqz    a0, 9f
 
-
-       /* We deliberately destroy a3 that holds the exception table. */
-
 8:     rsr     a3, excvaddr            # fault address
        _PGD_OFFSET(a0, a3, a1)
        l32i    a0, a0, 0               # read pmdval
@@ -1655,7 +1522,7 @@ ENTRY(fast_second_level_miss)
 
        /* Exit critical section. */
 
-4:     movi    a3, exc_table           # restore a3
+4:     rsr     a3, excsave1
        movi    a0, 0
        s32i    a0, a3, EXC_TABLE_FIXUP
 
@@ -1663,8 +1530,8 @@ ENTRY(fast_second_level_miss)
 
        l32i    a0, a2, PT_AREG0
        l32i    a1, a2, PT_AREG1
+       l32i    a3, a2, PT_AREG3
        l32i    a2, a2, PT_DEPC
-       xsr     a3, excsave1
 
        bgeui   a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
 
@@ -1751,11 +1618,8 @@ ENTRY(fast_second_level_miss)
 
 2:     /* Invalid PGD, default exception handling */
 
-       movi    a3, exc_table
        rsr     a1, depc
-       xsr     a3, excsave1
        s32i    a1, a2, PT_AREG2
-       s32i    a3, a2, PT_AREG3
        mov     a1, a2
 
        rsr     a2, ps
@@ -1775,9 +1639,9 @@ ENDPROC(fast_second_level_miss)
  *   a0:       trashed, original value saved on stack (PT_AREG0)
  *   a1:       a1
  *   a2:       new stack pointer, original in DEPC
- *   a3:       dispatch table
+ *   a3:       a3
  *   depc:     a2, original value saved on stack (PT_DEPC)
- *   excsave_1:        a3
+ *   excsave_1:        dispatch table
  *
  *   PT_DEPC >= VALID_DOUBLE_EXCEPTION_ADDRESS: double exception, DEPC
  *          <  VALID_DOUBLE_EXCEPTION_ADDRESS: regular exception
@@ -1785,17 +1649,17 @@ ENDPROC(fast_second_level_miss)
 
 ENTRY(fast_store_prohibited)
 
-       /* Save a1 and a4. */
+       /* Save a1 and a3. */
 
        s32i    a1, a2, PT_AREG1
-       s32i    a4, a2, PT_AREG4
+       s32i    a3, a2, PT_AREG3
 
        GET_CURRENT(a1,a2)
        l32i    a0, a1, TASK_MM         # tsk->mm
        beqz    a0, 9f
 
 8:     rsr     a1, excvaddr            # fault address
-       _PGD_OFFSET(a0, a1, a4)
+       _PGD_OFFSET(a0, a1, a3)
        l32i    a0, a0, 0
        beqz    a0, 2f
 
@@ -1804,39 +1668,37 @@ ENTRY(fast_store_prohibited)
         * and is not PAGE_NONE. See pgtable.h for possible PTE layouts.
         */
 
-       _PTE_OFFSET(a0, a1, a4)
-       l32i    a4, a0, 0               # read pteval
+       _PTE_OFFSET(a0, a1, a3)
+       l32i    a3, a0, 0               # read pteval
        movi    a1, _PAGE_CA_INVALID
-       ball    a4, a1, 2f
-       bbci.l  a4, _PAGE_WRITABLE_BIT, 2f
+       ball    a3, a1, 2f
+       bbci.l  a3, _PAGE_WRITABLE_BIT, 2f
 
        movi    a1, _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_HW_WRITE
-       or      a4, a4, a1
+       or      a3, a3, a1
        rsr     a1, excvaddr
-       s32i    a4, a0, 0
+       s32i    a3, a0, 0
 
        /* We need to flush the cache if we have page coloring. */
 #if (DCACHE_WAY_SIZE > PAGE_SIZE) && XCHAL_DCACHE_IS_WRITEBACK
        dhwb    a0, 0
 #endif
        pdtlb   a0, a1
-       wdtlb   a4, a0
+       wdtlb   a3, a0
 
        /* Exit critical section. */
 
        movi    a0, 0
+       rsr     a3, excsave1
        s32i    a0, a3, EXC_TABLE_FIXUP
 
        /* Restore the working registers, and return. */
 
-       l32i    a4, a2, PT_AREG4
+       l32i    a3, a2, PT_AREG3
        l32i    a1, a2, PT_AREG1
        l32i    a0, a2, PT_AREG0
        l32i    a2, a2, PT_DEPC
 
-       /* Restore excsave1 and a3. */
-
-       xsr     a3, excsave1
        bgeui   a2, VALID_DOUBLE_EXCEPTION_ADDRESS, 1f
 
        rsr     a2, depc
@@ -1853,11 +1715,8 @@ ENTRY(fast_store_prohibited)
 
 2:     /* If there was a problem, handle fault in C */
 
-       rsr     a4, depc        # still holds a2
-       xsr     a3, excsave1
-       s32i    a4, a2, PT_AREG2
-       s32i    a3, a2, PT_AREG3
-       l32i    a4, a2, PT_AREG4
+       rsr     a3, depc        # still holds a2
+       s32i    a3, a2, PT_AREG2
        mov     a1, a2
 
        rsr     a2, ps
index 101012b..946fb8d 100644 (file)
@@ -584,8 +584,8 @@ c_show(struct seq_file *f, void *slot)
                     "bogomips\t: %lu.%02lu\n",
                     XCHAL_BUILD_UNIQUE_ID,
                     XCHAL_HAVE_BE ?  "big" : "little",
-                    CCOUNT_PER_JIFFY/(1000000/HZ),
-                    (CCOUNT_PER_JIFFY/(10000/HZ)) % 100,
+                    ccount_freq/1000000,
+                    (ccount_freq/10000) % 100,
                     loops_per_jiffy/(500000/HZ),
                     (loops_per_jiffy/(5000/HZ)) % 100);
 
index 24bb0c1..9af3dd8 100644 (file)
@@ -29,9 +29,7 @@
 #include <asm/timex.h>
 #include <asm/platform.h>
 
-#ifdef CONFIG_XTENSA_CALIBRATE_CCOUNT
 unsigned long ccount_freq;             /* ccount Hz */
-#endif
 
 static cycle_t ccount_read(struct clocksource *cs)
 {
@@ -129,8 +127,10 @@ void __init time_init(void)
        platform_calibrate_ccount();
        printk("%d.%02d MHz\n", (int)ccount_freq/1000000,
                        (int)(ccount_freq/10000)%100);
+#else
+       ccount_freq = CONFIG_XTENSA_CPU_CLOCK*1000000UL;
 #endif
-       clocksource_register_hz(&ccount_clocksource, CCOUNT_PER_JIFFY * HZ);
+       clocksource_register_hz(&ccount_clocksource, ccount_freq);
 
        ccount_timer.evt.cpumask = cpumask_of(0);
        ccount_timer.evt.irq = irq_create_mapping(NULL, LINUX_TIMER_INT);
@@ -164,7 +164,7 @@ irqreturn_t timer_interrupt (int irq, void *dev_id)
 #ifndef CONFIG_GENERIC_CALIBRATE_DELAY
 void calibrate_delay(void)
 {
-       loops_per_jiffy = CCOUNT_PER_JIFFY;
+       loops_per_jiffy = ccount_freq / HZ;
        printk("Calibrating delay loop (skipped)... "
               "%lu.%02lu BogoMIPS preset\n",
               loops_per_jiffy/(1000000/HZ),
index f9e1753..cb8fd44 100644 (file)
@@ -78,6 +78,7 @@ ENTRY(_UserExceptionVector)
        s32i    a0, a2, PT_DEPC         # mark it as a regular exception
        addx4   a0, a0, a3              # find entry in table
        l32i    a0, a0, EXC_TABLE_FAST_USER     # load handler
+       xsr     a3, excsave1            # restore a3 and dispatch table
        jx      a0
 
 ENDPROC(_UserExceptionVector)
@@ -104,6 +105,7 @@ ENTRY(_KernelExceptionVector)
        s32i    a0, a2, PT_DEPC         # mark it as a regular exception
        addx4   a0, a0, a3              # find entry in table
        l32i    a0, a0, EXC_TABLE_FAST_KERNEL   # load handler address
+       xsr     a3, excsave1            # restore a3 and dispatch table
        jx      a0
 
 ENDPROC(_KernelExceptionVector)
@@ -168,7 +170,7 @@ ENDPROC(_KernelExceptionVector)
  *
  *     a0:        DEPC
  *     a1:        a1
- *     a2:        trashed, original value in EXC_TABLE_DOUBLE_A2
+ *     a2:        trashed, original value in EXC_TABLE_DOUBLE_SAVE
  *     a3:        exctable
  *     depc:      a0
  *     excsave_1: a3
@@ -204,47 +206,46 @@ ENDPROC(_KernelExceptionVector)
 
        .section .DoubleExceptionVector.text, "ax"
        .begin literal_prefix .DoubleExceptionVector
+       .globl _DoubleExceptionVector_WindowUnderflow
+       .globl _DoubleExceptionVector_WindowOverflow
 
 ENTRY(_DoubleExceptionVector)
 
-       /* Deliberately destroy excsave (don't assume it's value was valid). */
-
-       wsr     a3, excsave1            # save a3
+       xsr     a3, excsave1
+       s32i    a2, a3, EXC_TABLE_DOUBLE_SAVE
 
        /* Check for kernel double exception (usually fatal). */
 
-       rsr     a3, ps
-       _bbci.l a3, PS_UM_BIT, .Lksp
+       rsr     a2, ps
+       _bbci.l a2, PS_UM_BIT, .Lksp
 
        /* Check if we are currently handling a window exception. */
        /* Note: We don't need to indicate that we enter a critical section. */
 
        xsr     a0, depc                # get DEPC, save a0
 
-       movi    a3, WINDOW_VECTORS_VADDR
-       _bltu   a0, a3, .Lfixup
-       addi    a3, a3, WINDOW_VECTORS_SIZE
-       _bgeu   a0, a3, .Lfixup
+       movi    a2, WINDOW_VECTORS_VADDR
+       _bltu   a0, a2, .Lfixup
+       addi    a2, a2, WINDOW_VECTORS_SIZE
+       _bgeu   a0, a2, .Lfixup
 
        /* Window overflow/underflow exception. Get stack pointer. */
 
-       mov     a3, a2
-       /* This explicit literal and the following references to it are made
-        * in order to fit DoubleExceptionVector.literals into the available
-        * 16-byte gap before DoubleExceptionVector.text in the absence of
-        * link time relaxation. See kernel/vmlinux.lds.S
-        */
-       .literal .Lexc_table, exc_table
-       l32r    a2, .Lexc_table
-       l32i    a2, a2, EXC_TABLE_KSTK
+       l32i    a2, a3, EXC_TABLE_KSTK
 
        /* Check for overflow/underflow exception, jump if overflow. */
 
-       _bbci.l a0, 6, .Lovfl
-
-       /* a0: depc, a1: a1, a2: kstk, a3: a2, depc: a0, excsave: a3  */
+       _bbci.l a0, 6, _DoubleExceptionVector_WindowOverflow
 
-       /* Restart window underflow exception.
+       /*
+        * Restart window underflow exception.
+        * Currently:
+        *      depc = orig a0,
+        *      a0 = orig DEPC,
+        *      a2 = new sp based on KSTK from exc_table
+        *      a3 = excsave_1
+        *      excsave_1 = orig a3
+        *
         * We return to the instruction in user space that caused the window
         * underflow exception. Therefore, we change window base to the value
         * before we entered the window underflow exception and prepare the
@@ -252,10 +253,11 @@ ENTRY(_DoubleExceptionVector)
         * by changing depc (in a0).
         * Note: We can trash the current window frame (a0...a3) and depc!
         */
-
+_DoubleExceptionVector_WindowUnderflow:
+       xsr     a3, excsave1
        wsr     a2, depc                # save stack pointer temporarily
        rsr     a0, ps
-       extui   a0, a0, PS_OWB_SHIFT, 4
+       extui   a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH
        wsr     a0, windowbase
        rsync
 
@@ -263,28 +265,57 @@ ENTRY(_DoubleExceptionVector)
 
        xsr     a2, depc                # save a2 and get stack pointer
        s32i    a0, a2, PT_AREG0
-
-       wsr     a3, excsave1            # save a3
-       l32r    a3, .Lexc_table
-
+       xsr     a3, excsave1
        rsr     a0, exccause
        s32i    a0, a2, PT_DEPC         # mark it as a regular exception
        addx4   a0, a0, a3
+       xsr     a3, excsave1
        l32i    a0, a0, EXC_TABLE_FAST_USER
        jx      a0
 
-.Lfixup:/* Check for a fixup handler or if we were in a critical section. */
+       /*
+        * We only allow the ITLB miss exception if we are in kernel space.
+        * All other exceptions are unexpected and thus unrecoverable!
+        */
+
+#ifdef CONFIG_MMU
+       .extern fast_second_level_miss_double_kernel
+
+.Lksp: /* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */
+
+       rsr     a3, exccause
+       beqi    a3, EXCCAUSE_ITLB_MISS, 1f
+       addi    a3, a3, -EXCCAUSE_DTLB_MISS
+       bnez    a3, .Lunrecoverable
+1:     movi    a3, fast_second_level_miss_double_kernel
+       jx      a3
+#else
+.equ   .Lksp,  .Lunrecoverable
+#endif
+
+       /* Critical! We can't handle this situation. PANIC! */
 
-       /* a0: depc, a1: a1, a2: a2, a3: trashed, depc: a0, excsave1: a3 */
+       .extern unrecoverable_exception
 
-       l32r    a3, .Lexc_table
-       s32i    a2, a3, EXC_TABLE_DOUBLE_SAVE   # temporary variable
+.Lunrecoverable_fixup:
+       l32i    a2, a3, EXC_TABLE_DOUBLE_SAVE
+       xsr     a0, depc
+
+.Lunrecoverable:
+       rsr     a3, excsave1
+       wsr     a0, excsave1
+       movi    a0, unrecoverable_exception
+       callx0  a0
+
+.Lfixup:/* Check for a fixup handler or if we were in a critical section. */
+
+       /* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave1: a3 */
 
        /* Enter critical section. */
 
        l32i    a2, a3, EXC_TABLE_FIXUP
        s32i    a3, a3, EXC_TABLE_FIXUP
-       beq     a2, a3, .Lunrecoverable_fixup   # critical!
+       beq     a2, a3, .Lunrecoverable_fixup   # critical section
        beqz    a2, .Ldflt                      # no handler was registered
 
        /* a0: depc, a1: a1, a2: trash, a3: exctable, depc: a0, excsave: a3 */
@@ -293,58 +324,145 @@ ENTRY(_DoubleExceptionVector)
 
 .Ldflt:        /* Get stack pointer. */
 
-       l32i    a3, a3, EXC_TABLE_DOUBLE_SAVE
-       addi    a2, a3, -PT_USER_SIZE
-
-.Lovfl:        /* Jump to default handlers. */
+       l32i    a2, a3, EXC_TABLE_DOUBLE_SAVE
+       addi    a2, a2, -PT_USER_SIZE
 
-       /* a0: depc, a1: a1, a2: kstk, a3: a2, depc: a0, excsave: a3 */
+       /* a0: depc, a1: a1, a2: kstk, a3: exctable, depc: a0, excsave: a3 */
 
-       xsr     a3, depc
        s32i    a0, a2, PT_DEPC
-       s32i    a3, a2, PT_AREG0
+       l32i    a0, a3, EXC_TABLE_DOUBLE_SAVE
+       xsr     a0, depc
+       s32i    a0, a2, PT_AREG0
 
-       /* a0: avail, a1: a1, a2: kstk, a3: avail, depc: a2, excsave: a3 */
+       /* a0: avail, a1: a1, a2: kstk, a3: exctable, depc: a2, excsave: a3 */
 
-       l32r    a3, .Lexc_table
        rsr     a0, exccause
        addx4   a0, a0, a3
+       xsr     a3, excsave1
        l32i    a0, a0, EXC_TABLE_FAST_USER
        jx      a0
 
        /*
-        * We only allow the ITLB miss exception if we are in kernel space.
-        * All other exceptions are unexpected and thus unrecoverable!
+        * Restart window OVERFLOW exception.
+        * Currently:
+        *      depc = orig a0,
+        *      a0 = orig DEPC,
+        *      a2 = new sp based on KSTK from exc_table
+        *      a3 = EXCSAVE_1
+        *      excsave_1 = orig a3
+        *
+        * We return to the instruction in user space that caused the window
+        * overflow exception. Therefore, we change window base to the value
+        * before we entered the window overflow exception and prepare the
+        * registers to return as if we were coming from a regular exception
+        * by changing DEPC (in a0).
+        *
+        * NOTE: We CANNOT trash the current window frame (a0...a3), but we
+        * can clobber depc.
+        *
+        * The tricky part here is that overflow8 and overflow12 handlers
+        * save a0, then clobber a0.  To restart the handler, we have to restore
+        * a0 if the double exception was past the point where a0 was clobbered.
+        *
+        * To keep things simple, we take advantage of the fact all overflow
+        * handlers save a0 in their very first instruction.  If DEPC was past
+        * that instruction, we can safely restore a0 from where it was saved
+        * on the stack.
+        *
+        * a0: depc, a1: a1, a2: kstk, a3: exc_table, depc: a0, excsave1: a3
         */
+_DoubleExceptionVector_WindowOverflow:
+       extui   a2, a0, 0, 6    # get offset into 64-byte vector handler
+       beqz    a2, 1f          # if at start of vector, don't restore
 
-#ifdef CONFIG_MMU
-       .extern fast_second_level_miss_double_kernel
+       addi    a0, a0, -128
+       bbsi    a0, 8, 1f       # don't restore except for overflow 8 and 12
+       bbsi    a0, 7, 2f
 
-.Lksp: /* a0: a0, a1: a1, a2: a2, a3: trashed, depc: depc, excsave: a3 */
+       /*
+        * Restore a0 as saved by _WindowOverflow8().
+        *
+        * FIXME:  we really need a fixup handler for this L32E,
+        * for the extremely unlikely case where the overflow handler's
+        * reference thru a0 gets a hardware TLB refill that bumps out
+        * the (distinct, aliasing) TLB entry that mapped its prior
+        * references thru a9, and where our reference now thru a9
+        * gets a 2nd-level miss exception (not hardware TLB refill).
+        */
 
-       rsr     a3, exccause
-       beqi    a3, EXCCAUSE_ITLB_MISS, 1f
-       addi    a3, a3, -EXCCAUSE_DTLB_MISS
-       bnez    a3, .Lunrecoverable
-1:     movi    a3, fast_second_level_miss_double_kernel
-       jx      a3
-#else
-.equ   .Lksp,  .Lunrecoverable
-#endif
+       l32e    a2, a9, -16
+       wsr     a2, depc        # replace the saved a0
+       j       1f
 
-       /* Critical! We can't handle this situation. PANIC! */
+2:
+       /*
+        * Restore a0 as saved by _WindowOverflow12().
+        *
+        * FIXME:  we really need a fixup handler for this L32E,
+        * for the extremely unlikely case where the overflow handler's
+        * reference thru a0 gets a hardware TLB refill that bumps out
+        * the (distinct, aliasing) TLB entry that mapped its prior
+        * references thru a13, and where our reference now thru a13
+        * gets a 2nd-level miss exception (not hardware TLB refill).
+        */
 
-       .extern unrecoverable_exception
+       l32e    a2, a13, -16
+       wsr     a2, depc        # replace the saved a0
+1:
+       /*
+        * Restore WindowBase while leaving all address registers restored.
+        * We have to use ROTW for this, because WSR.WINDOWBASE requires
+        * an address register (which would prevent restore).
+        *
+        * Window Base goes from 0 ... 7 (Module 8)
+        * Window Start is 8 bits; Ex: (0b1010 1010):0x55 from series of call4s
+        */
+
+       rsr     a0, ps
+       extui   a0, a0, PS_OWB_SHIFT, PS_OWB_WIDTH
+       rsr     a2, windowbase
+       sub     a0, a2, a0
+       extui   a0, a0, 0, 3
 
-.Lunrecoverable_fixup:
        l32i    a2, a3, EXC_TABLE_DOUBLE_SAVE
-       xsr     a0, depc
+       xsr     a3, excsave1
+       beqi    a0, 1, .L1pane
+       beqi    a0, 3, .L3pane
 
-.Lunrecoverable:
-       rsr     a3, excsave1
-       wsr     a0, excsave1
-       movi    a0, unrecoverable_exception
-       callx0  a0
+       rsr     a0, depc
+       rotw    -2
+
+       /*
+        * We are now in the user code's original window frame.
+        * Process the exception as a user exception as if it was
+        * taken by the user code.
+        *
+        * This is similar to the user exception vector,
+        * except that PT_DEPC isn't set to EXCCAUSE.
+        */
+1:
+       xsr     a3, excsave1
+       wsr     a2, depc
+       l32i    a2, a3, EXC_TABLE_KSTK
+       s32i    a0, a2, PT_AREG0
+       rsr     a0, exccause
+
+       s32i    a0, a2, PT_DEPC
+
+       addx4   a0, a0, a3
+       l32i    a0, a0, EXC_TABLE_FAST_USER
+       xsr     a3, excsave1
+       jx      a0
+
+.L1pane:
+       rsr     a0, depc
+       rotw    -1
+       j       1b
+
+.L3pane:
+       rsr     a0, depc
+       rotw    -3
+       j       1b
 
        .end literal_prefix
 
index d8507f8..74a60c7 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
+#include <asm/ftrace.h>
 #ifdef CONFIG_BLK_DEV_FD
 #include <asm/floppy.h>
 #endif
index 4b7bc8d..70fa7bc 100644 (file)
@@ -72,6 +72,8 @@ void do_page_fault(struct pt_regs *regs)
               address, exccause, regs->pc, is_write? "w":"", is_exec? "x":"");
 #endif
 
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 retry:
        down_read(&mm->mmap_sem);
        vma = find_vma(mm, address);
index a7e40a7..7f38e40 100644 (file)
@@ -99,6 +99,12 @@ config BLK_DEV_THROTTLING
 
        See Documentation/cgroups/blkio-controller.txt for more information.
 
+config CMDLINE_PARSER
+       bool "Block device command line partition parser"
+       default n
+       ---help---
+       Parsing command line, get the partitions information.
+
 menu "Partition Types"
 
 source "block/partitions/Kconfig"
index 39b76ba..4fa4be5 100644 (file)
@@ -18,3 +18,4 @@ obj-$(CONFIG_IOSCHED_CFQ)     += cfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)     += compat_ioctl.o
 obj-$(CONFIG_BLK_DEV_INTEGRITY)        += blk-integrity.o
+obj-$(CONFIG_CMDLINE_PARSER)   += cmdline-parser.o
index 4464c82..46cd7bd 100644 (file)
@@ -367,7 +367,7 @@ struct io_cq *ioc_create_icq(struct io_context *ioc, struct request_queue *q,
        if (!icq)
                return NULL;
 
-       if (radix_tree_preload(gfp_mask) < 0) {
+       if (radix_tree_maybe_preload(gfp_mask) < 0) {
                kmem_cache_free(et->icq_cache, icq);
                return NULL;
        }
index 5efc5a6..3aa5b19 100644 (file)
@@ -29,7 +29,7 @@ queue_var_store(unsigned long *var, const char *page, size_t count)
        int err;
        unsigned long v;
 
-       err = strict_strtoul(page, 10, &v);
+       err = kstrtoul(page, 10, &v);
        if (err || v > UINT_MAX)
                return -EINVAL;
 
diff --git a/block/cmdline-parser.c b/block/cmdline-parser.c
new file mode 100644 (file)
index 0000000..cc2637f
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * Parse command line, get partition information
+ *
+ * Written by Cai Zhiyong <caizhiyong@huawei.com>
+ *
+ */
+#include <linux/buffer_head.h>
+#include <linux/module.h>
+#include <linux/cmdline-parser.h>
+
+static int parse_subpart(struct cmdline_subpart **subpart, char *partdef)
+{
+       int ret = 0;
+       struct cmdline_subpart *new_subpart;
+
+       *subpart = NULL;
+
+       new_subpart = kzalloc(sizeof(struct cmdline_subpart), GFP_KERNEL);
+       if (!new_subpart)
+               return -ENOMEM;
+
+       if (*partdef == '-') {
+               new_subpart->size = (sector_t)(~0ULL);
+               partdef++;
+       } else {
+               new_subpart->size = (sector_t)memparse(partdef, &partdef);
+               if (new_subpart->size < (sector_t)PAGE_SIZE) {
+                       pr_warn("cmdline partition size is invalid.");
+                       ret = -EINVAL;
+                       goto fail;
+               }
+       }
+
+       if (*partdef == '@') {
+               partdef++;
+               new_subpart->from = (sector_t)memparse(partdef, &partdef);
+       } else {
+               new_subpart->from = (sector_t)(~0ULL);
+       }
+
+       if (*partdef == '(') {
+               int length;
+               char *next = strchr(++partdef, ')');
+
+               if (!next) {
+                       pr_warn("cmdline partition format is invalid.");
+                       ret = -EINVAL;
+                       goto fail;
+               }
+
+               length = min_t(int, next - partdef,
+                              sizeof(new_subpart->name) - 1);
+               strncpy(new_subpart->name, partdef, length);
+               new_subpart->name[length] = '\0';
+
+               partdef = ++next;
+       } else
+               new_subpart->name[0] = '\0';
+
+       new_subpart->flags = 0;
+
+       if (!strncmp(partdef, "ro", 2)) {
+               new_subpart->flags |= PF_RDONLY;
+               partdef += 2;
+       }
+
+       if (!strncmp(partdef, "lk", 2)) {
+               new_subpart->flags |= PF_POWERUP_LOCK;
+               partdef += 2;
+       }
+
+       *subpart = new_subpart;
+       return 0;
+fail:
+       kfree(new_subpart);
+       return ret;
+}
+
+static void free_subpart(struct cmdline_parts *parts)
+{
+       struct cmdline_subpart *subpart;
+
+       while (parts->subpart) {
+               subpart = parts->subpart;
+               parts->subpart = subpart->next_subpart;
+               kfree(subpart);
+       }
+}
+
+static int parse_parts(struct cmdline_parts **parts, const char *bdevdef)
+{
+       int ret = -EINVAL;
+       char *next;
+       int length;
+       struct cmdline_subpart **next_subpart;
+       struct cmdline_parts *newparts;
+       char buf[BDEVNAME_SIZE + 32 + 4];
+
+       *parts = NULL;
+
+       newparts = kzalloc(sizeof(struct cmdline_parts), GFP_KERNEL);
+       if (!newparts)
+               return -ENOMEM;
+
+       next = strchr(bdevdef, ':');
+       if (!next) {
+               pr_warn("cmdline partition has no block device.");
+               goto fail;
+       }
+
+       length = min_t(int, next - bdevdef, sizeof(newparts->name) - 1);
+       strncpy(newparts->name, bdevdef, length);
+       newparts->name[length] = '\0';
+       newparts->nr_subparts = 0;
+
+       next_subpart = &newparts->subpart;
+
+       while (next && *(++next)) {
+               bdevdef = next;
+               next = strchr(bdevdef, ',');
+
+               length = (!next) ? (sizeof(buf) - 1) :
+                       min_t(int, next - bdevdef, sizeof(buf) - 1);
+
+               strncpy(buf, bdevdef, length);
+               buf[length] = '\0';
+
+               ret = parse_subpart(next_subpart, buf);
+               if (ret)
+                       goto fail;
+
+               newparts->nr_subparts++;
+               next_subpart = &(*next_subpart)->next_subpart;
+       }
+
+       if (!newparts->subpart) {
+               pr_warn("cmdline partition has no valid partition.");
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       *parts = newparts;
+
+       return 0;
+fail:
+       free_subpart(newparts);
+       kfree(newparts);
+       return ret;
+}
+
+void cmdline_parts_free(struct cmdline_parts **parts)
+{
+       struct cmdline_parts *next_parts;
+
+       while (*parts) {
+               next_parts = (*parts)->next_parts;
+               free_subpart(*parts);
+               kfree(*parts);
+               *parts = next_parts;
+       }
+}
+
+int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline)
+{
+       int ret;
+       char *buf;
+       char *pbuf;
+       char *next;
+       struct cmdline_parts **next_parts;
+
+       *parts = NULL;
+
+       next = pbuf = buf = kstrdup(cmdline, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       next_parts = parts;
+
+       while (next && *pbuf) {
+               next = strchr(pbuf, ';');
+               if (next)
+                       *next = '\0';
+
+               ret = parse_parts(next_parts, pbuf);
+               if (ret)
+                       goto fail;
+
+               if (next)
+                       pbuf = ++next;
+
+               next_parts = &(*next_parts)->next_parts;
+       }
+
+       if (!*parts) {
+               pr_warn("cmdline partition has no valid partition.");
+               ret = -EINVAL;
+               goto fail;
+       }
+
+       ret = 0;
+done:
+       kfree(buf);
+       return ret;
+
+fail:
+       cmdline_parts_free(parts);
+       goto done;
+}
+
+struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
+                                        const char *bdev)
+{
+       while (parts && strncmp(bdev, parts->name, sizeof(parts->name)))
+               parts = parts->next_parts;
+       return parts;
+}
+
+/*
+ *  add_part()
+ *    0 success.
+ *    1 can not add so many partitions.
+ */
+void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+                      int slot,
+                      int (*add_part)(int, struct cmdline_subpart *, void *),
+                      void *param)
+
+{
+       sector_t from = 0;
+       struct cmdline_subpart *subpart;
+
+       for (subpart = parts->subpart; subpart;
+            subpart = subpart->next_subpart, slot++) {
+               if (subpart->from == (sector_t)(~0ULL))
+                       subpart->from = from;
+               else
+                       from = subpart->from;
+
+               if (from >= disk_size)
+                       break;
+
+               if (subpart->size > (disk_size - from))
+                       subpart->size = disk_size - from;
+
+               from += subpart->size;
+
+               if (add_part(slot, subpart, param))
+                       break;
+       }
+}
index 7e5d474..fbd5a67 100644 (file)
@@ -70,7 +70,7 @@ static int compat_hdio_getgeo(struct gendisk *disk, struct block_device *bdev,
                return ret;
 
        ret = copy_to_user(ugeo, &geo, 4);
-       ret |= __put_user(geo.start, &ugeo->start);
+       ret |= put_user(geo.start, &ugeo->start);
        if (ret)
                ret = -EFAULT;
 
index 4cebb2f..87a3208 100644 (file)
@@ -260,3 +260,10 @@ config SYSV68_PARTITION
          partition table format used by Motorola Delta machines (using
          sysv68).
          Otherwise, say N.
+
+config CMDLINE_PARTITION
+       bool "Command line partition support" if PARTITION_ADVANCED
+       select CMDLINE_PARSER
+       help
+         Say Y here if you would read the partitions table from bootargs.
+         The format for the command line is just like mtdparts.
index 2be4d7b..37a9527 100644 (file)
@@ -8,6 +8,7 @@ obj-$(CONFIG_ACORN_PARTITION) += acorn.o
 obj-$(CONFIG_AMIGA_PARTITION) += amiga.o
 obj-$(CONFIG_ATARI_PARTITION) += atari.o
 obj-$(CONFIG_AIX_PARTITION) += aix.o
+obj-$(CONFIG_CMDLINE_PARTITION) += cmdline.o
 obj-$(CONFIG_MAC_PARTITION) += mac.o
 obj-$(CONFIG_LDM_PARTITION) += ldm.o
 obj-$(CONFIG_MSDOS_PARTITION) += msdos.o
index 19ba207..9ac1df7 100644 (file)
@@ -34,6 +34,7 @@
 #include "efi.h"
 #include "karma.h"
 #include "sysv68.h"
+#include "cmdline.h"
 
 int warn_no_part = 1; /*This is ugly: should make genhd removable media aware*/
 
@@ -65,6 +66,9 @@ static int (*check_part[])(struct parsed_partitions *) = {
        adfspart_check_ADFS,
 #endif
 
+#ifdef CONFIG_CMDLINE_PARTITION
+       cmdline_partition,
+#endif
 #ifdef CONFIG_EFI_PARTITION
        efi_partition,          /* this must come before msdos */
 #endif
diff --git a/block/partitions/cmdline.c b/block/partitions/cmdline.c
new file mode 100644 (file)
index 0000000..56cf4ff
--- /dev/null
@@ -0,0 +1,99 @@
+/*
+ * Copyright (C) 2013 HUAWEI
+ * Author: Cai Zhiyong <caizhiyong@huawei.com>
+ *
+ * Read block device partition table from command line.
+ * The partition used for fixed block device (eMMC) embedded device.
+ * It is no MBR, save storage space. Bootloader can be easily accessed
+ * by absolute address of data on the block device.
+ * Users can easily change the partition.
+ *
+ * The format for the command line is just like mtdparts.
+ *
+ * Verbose config please reference "Documentation/block/cmdline-partition.txt"
+ *
+ */
+
+#include <linux/cmdline-parser.h>
+
+#include "check.h"
+#include "cmdline.h"
+
+static char *cmdline;
+static struct cmdline_parts *bdev_parts;
+
+static int add_part(int slot, struct cmdline_subpart *subpart, void *param)
+{
+       int label_min;
+       struct partition_meta_info *info;
+       char tmp[sizeof(info->volname) + 4];
+       struct parsed_partitions *state = (struct parsed_partitions *)param;
+
+       if (slot >= state->limit)
+               return 1;
+
+       put_partition(state, slot, subpart->from >> 9,
+                     subpart->size >> 9);
+
+       info = &state->parts[slot].info;
+
+       label_min = min_t(int, sizeof(info->volname) - 1,
+                         sizeof(subpart->name));
+       strncpy(info->volname, subpart->name, label_min);
+       info->volname[label_min] = '\0';
+
+       snprintf(tmp, sizeof(tmp), "(%s)", info->volname);
+       strlcat(state->pp_buf, tmp, PAGE_SIZE);
+
+       state->parts[slot].has_info = true;
+
+       return 0;
+}
+
+static int __init cmdline_parts_setup(char *s)
+{
+       cmdline = s;
+       return 1;
+}
+__setup("blkdevparts=", cmdline_parts_setup);
+
+/*
+ * Purpose: allocate cmdline partitions.
+ * Returns:
+ * -1 if unable to read the partition table
+ *  0 if this isn't our partition table
+ *  1 if successful
+ */
+int cmdline_partition(struct parsed_partitions *state)
+{
+       sector_t disk_size;
+       char bdev[BDEVNAME_SIZE];
+       struct cmdline_parts *parts;
+
+       if (cmdline) {
+               if (bdev_parts)
+                       cmdline_parts_free(&bdev_parts);
+
+               if (cmdline_parts_parse(&bdev_parts, cmdline)) {
+                       cmdline = NULL;
+                       return -1;
+               }
+               cmdline = NULL;
+       }
+
+       if (!bdev_parts)
+               return 0;
+
+       bdevname(state->bdev, bdev);
+       parts = cmdline_parts_find(bdev_parts, bdev);
+       if (!parts)
+               return 0;
+
+       disk_size = get_capacity(state->bdev->bd_disk) << 9;
+
+       cmdline_parts_set(parts, disk_size, 1, add_part, (void *)state);
+
+       strlcat(state->pp_buf, "\n", PAGE_SIZE);
+
+       return 1;
+}
diff --git a/block/partitions/cmdline.h b/block/partitions/cmdline.h
new file mode 100644 (file)
index 0000000..26e0f8d
--- /dev/null
@@ -0,0 +1,2 @@
+
+int cmdline_partition(struct parsed_partitions *state);
index c85fc89..1eb09ee 100644 (file)
@@ -25,6 +25,9 @@
  * TODO:
  *
  * Changelog:
+ * Mon August 5th, 2013 Davidlohr Bueso <davidlohr@hp.com>
+ * - detect hybrid MBRs, tighter pMBR checking & cleanups.
+ *
  * Mon Nov 09 2004 Matt Domsch <Matt_Domsch@dell.com>
  * - test for valid PMBR and valid PGPT before ever reading
  *   AGPT, allow override with 'gpt' kernel command line option.
@@ -149,34 +152,84 @@ static u64 last_lba(struct block_device *bdev)
                       bdev_logical_block_size(bdev)) - 1ULL;
 }
 
-static inline int
-pmbr_part_valid(struct partition *part)
+static inline int pmbr_part_valid(gpt_mbr_record *part)
 {
-        if (part->sys_ind == EFI_PMBR_OSTYPE_EFI_GPT &&
-            le32_to_cpu(part->start_sect) == 1UL)
-                return 1;
-        return 0;
+       if (part->os_type != EFI_PMBR_OSTYPE_EFI_GPT)
+               goto invalid;
+
+       /* set to 0x00000001 (i.e., the LBA of the GPT Partition Header) */
+       if (le32_to_cpu(part->starting_lba) != GPT_PRIMARY_PARTITION_TABLE_LBA)
+               goto invalid;
+
+       return GPT_MBR_PROTECTIVE;
+invalid:
+       return 0;
 }
 
 /**
  * is_pmbr_valid(): test Protective MBR for validity
  * @mbr: pointer to a legacy mbr structure
+ * @total_sectors: amount of sectors in the device
  *
- * Description: Returns 1 if PMBR is valid, 0 otherwise.
- * Validity depends on two things:
+ * Description: Checks for a valid protective or hybrid
+ * master boot record (MBR). The validity of a pMBR depends
+ * on all of the following properties:
  *  1) MSDOS signature is in the last two bytes of the MBR
  *  2) One partition of type 0xEE is found
+ *
+ * In addition, a hybrid MBR will have up to three additional
+ * primary partitions, which point to the same space that's
+ * marked out by up to three GPT partitions.
+ *
+ * Returns 0 upon invalid MBR, or GPT_MBR_PROTECTIVE or
+ * GPT_MBR_HYBRID depending on the device layout.
  */
-static int
-is_pmbr_valid(legacy_mbr *mbr)
+static int is_pmbr_valid(legacy_mbr *mbr, sector_t total_sectors)
 {
-       int i;
+       uint32_t sz = 0;
+       int i, part = 0, ret = 0; /* invalid by default */
+
        if (!mbr || le16_to_cpu(mbr->signature) != MSDOS_MBR_SIGNATURE)
-                return 0;
+               goto done;
+
+       for (i = 0; i < 4; i++) {
+               ret = pmbr_part_valid(&mbr->partition_record[i]);
+               if (ret == GPT_MBR_PROTECTIVE) {
+                       part = i;
+                       /*
+                        * Ok, we at least know that there's a protective MBR,
+                        * now check if there are other partition types for
+                        * hybrid MBR.
+                        */
+                       goto check_hybrid;
+               }
+       }
+
+       if (ret != GPT_MBR_PROTECTIVE)
+               goto done;
+check_hybrid:
        for (i = 0; i < 4; i++)
-               if (pmbr_part_valid(&mbr->partition_record[i]))
-                        return 1;
-       return 0;
+               if ((mbr->partition_record[i].os_type !=
+                       EFI_PMBR_OSTYPE_EFI_GPT) &&
+                   (mbr->partition_record[i].os_type != 0x00))
+                       ret = GPT_MBR_HYBRID;
+
+       /*
+        * Protective MBRs take up the lesser of the whole disk
+        * or 2 TiB (32bit LBA), ignoring the rest of the disk.
+        * Some partitioning programs, nonetheless, choose to set
+        * the size to the maximum 32-bit limitation, disregarding
+        * the disk size.
+        *
+        * Hybrid MBRs do not necessarily comply with this.
+        */
+       if (ret == GPT_MBR_PROTECTIVE) {
+               sz = le32_to_cpu(mbr->partition_record[part].size_in_lba);
+               if (sz != (uint32_t) total_sectors - 1 && sz != 0xFFFFFFFF)
+                       ret = 0;
+       }
+done:
+       return ret;
 }
 
 /**
@@ -243,8 +296,7 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state,
                return NULL;
 
        if (read_lba(state, le64_to_cpu(gpt->partition_entry_lba),
-                     (u8 *) pte,
-                    count) < count) {
+                       (u8 *) pte, count) < count) {
                kfree(pte);
                 pte=NULL;
                return NULL;
@@ -364,7 +416,12 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba,
                         (unsigned long long)lastlba);
                goto fail;
        }
-
+       if (le64_to_cpu((*gpt)->last_usable_lba) < le64_to_cpu((*gpt)->first_usable_lba)) {
+               pr_debug("GPT: last_usable_lba incorrect: %lld > %lld\n",
+                        (unsigned long long)le64_to_cpu((*gpt)->last_usable_lba),
+                        (unsigned long long)le64_to_cpu((*gpt)->first_usable_lba));
+               goto fail;
+       }
        /* Check that sizeof_partition_entry has the correct value */
        if (le32_to_cpu((*gpt)->sizeof_partition_entry) != sizeof(gpt_entry)) {
                pr_debug("GUID Partitition Entry Size check failed.\n");
@@ -429,44 +486,42 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
        if (!pgpt || !agpt)
                return;
        if (le64_to_cpu(pgpt->my_lba) != le64_to_cpu(agpt->alternate_lba)) {
-               printk(KERN_WARNING
-                      "GPT:Primary header LBA != Alt. header alternate_lba\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:Primary header LBA != Alt. header alternate_lba\n");
+               pr_warn("GPT:%lld != %lld\n",
                       (unsigned long long)le64_to_cpu(pgpt->my_lba),
                        (unsigned long long)le64_to_cpu(agpt->alternate_lba));
                error_found++;
        }
        if (le64_to_cpu(pgpt->alternate_lba) != le64_to_cpu(agpt->my_lba)) {
-               printk(KERN_WARNING
-                      "GPT:Primary header alternate_lba != Alt. header my_lba\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:Primary header alternate_lba != Alt. header my_lba\n");
+               pr_warn("GPT:%lld != %lld\n",
                       (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
                        (unsigned long long)le64_to_cpu(agpt->my_lba));
                error_found++;
        }
        if (le64_to_cpu(pgpt->first_usable_lba) !=
             le64_to_cpu(agpt->first_usable_lba)) {
-               printk(KERN_WARNING "GPT:first_usable_lbas don't match.\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:first_usable_lbas don't match.\n");
+               pr_warn("GPT:%lld != %lld\n",
                       (unsigned long long)le64_to_cpu(pgpt->first_usable_lba),
                        (unsigned long long)le64_to_cpu(agpt->first_usable_lba));
                error_found++;
        }
        if (le64_to_cpu(pgpt->last_usable_lba) !=
             le64_to_cpu(agpt->last_usable_lba)) {
-               printk(KERN_WARNING "GPT:last_usable_lbas don't match.\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:last_usable_lbas don't match.\n");
+               pr_warn("GPT:%lld != %lld\n",
                       (unsigned long long)le64_to_cpu(pgpt->last_usable_lba),
                        (unsigned long long)le64_to_cpu(agpt->last_usable_lba));
                error_found++;
        }
        if (efi_guidcmp(pgpt->disk_guid, agpt->disk_guid)) {
-               printk(KERN_WARNING "GPT:disk_guids don't match.\n");
+               pr_warn("GPT:disk_guids don't match.\n");
                error_found++;
        }
        if (le32_to_cpu(pgpt->num_partition_entries) !=
             le32_to_cpu(agpt->num_partition_entries)) {
-               printk(KERN_WARNING "GPT:num_partition_entries don't match: "
+               pr_warn("GPT:num_partition_entries don't match: "
                       "0x%x != 0x%x\n",
                       le32_to_cpu(pgpt->num_partition_entries),
                       le32_to_cpu(agpt->num_partition_entries));
@@ -474,8 +529,7 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
        }
        if (le32_to_cpu(pgpt->sizeof_partition_entry) !=
             le32_to_cpu(agpt->sizeof_partition_entry)) {
-               printk(KERN_WARNING
-                      "GPT:sizeof_partition_entry values don't match: "
+               pr_warn("GPT:sizeof_partition_entry values don't match: "
                       "0x%x != 0x%x\n",
                        le32_to_cpu(pgpt->sizeof_partition_entry),
                       le32_to_cpu(agpt->sizeof_partition_entry));
@@ -483,34 +537,30 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba)
        }
        if (le32_to_cpu(pgpt->partition_entry_array_crc32) !=
             le32_to_cpu(agpt->partition_entry_array_crc32)) {
-               printk(KERN_WARNING
-                      "GPT:partition_entry_array_crc32 values don't match: "
+               pr_warn("GPT:partition_entry_array_crc32 values don't match: "
                       "0x%x != 0x%x\n",
                        le32_to_cpu(pgpt->partition_entry_array_crc32),
                       le32_to_cpu(agpt->partition_entry_array_crc32));
                error_found++;
        }
        if (le64_to_cpu(pgpt->alternate_lba) != lastlba) {
-               printk(KERN_WARNING
-                      "GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:Primary header thinks Alt. header is not at the end of the disk.\n");
+               pr_warn("GPT:%lld != %lld\n",
                        (unsigned long long)le64_to_cpu(pgpt->alternate_lba),
                        (unsigned long long)lastlba);
                error_found++;
        }
 
        if (le64_to_cpu(agpt->my_lba) != lastlba) {
-               printk(KERN_WARNING
-                      "GPT:Alternate GPT header not at the end of the disk.\n");
-               printk(KERN_WARNING "GPT:%lld != %lld\n",
+               pr_warn("GPT:Alternate GPT header not at the end of the disk.\n");
+               pr_warn("GPT:%lld != %lld\n",
                        (unsigned long long)le64_to_cpu(agpt->my_lba),
                        (unsigned long long)lastlba);
                error_found++;
        }
 
        if (error_found)
-               printk(KERN_WARNING
-                      "GPT: Use GNU Parted to correct GPT errors.\n");
+               pr_warn("GPT: Use GNU Parted to correct GPT errors.\n");
        return;
 }
 
@@ -536,6 +586,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
        gpt_header *pgpt = NULL, *agpt = NULL;
        gpt_entry *pptes = NULL, *aptes = NULL;
        legacy_mbr *legacymbr;
+       sector_t total_sectors = i_size_read(state->bdev->bd_inode) >> 9;
        u64 lastlba;
 
        if (!ptes)
@@ -543,17 +594,22 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
 
        lastlba = last_lba(state->bdev);
         if (!force_gpt) {
-                /* This will be added to the EFI Spec. per Intel after v1.02. */
-                legacymbr = kzalloc(sizeof (*legacymbr), GFP_KERNEL);
-                if (legacymbr) {
-                        read_lba(state, 0, (u8 *) legacymbr,
-                                sizeof (*legacymbr));
-                        good_pmbr = is_pmbr_valid(legacymbr);
-                        kfree(legacymbr);
-                }
-                if (!good_pmbr)
-                        goto fail;
-        }
+               /* This will be added to the EFI Spec. per Intel after v1.02. */
+               legacymbr = kzalloc(sizeof(*legacymbr), GFP_KERNEL);
+               if (!legacymbr)
+                       goto fail;
+
+               read_lba(state, 0, (u8 *)legacymbr, sizeof(*legacymbr));
+               good_pmbr = is_pmbr_valid(legacymbr, total_sectors);
+               kfree(legacymbr);
+
+               if (!good_pmbr)
+                       goto fail;
+
+               pr_debug("Device has a %s MBR\n",
+                        good_pmbr == GPT_MBR_PROTECTIVE ?
+                                               "protective" : "hybrid");
+       }
 
        good_pgpt = is_gpt_valid(state, GPT_PRIMARY_PARTITION_TABLE_LBA,
                                 &pgpt, &pptes);
@@ -576,11 +632,8 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
                 *ptes = pptes;
                 kfree(agpt);
                 kfree(aptes);
-                if (!good_agpt) {
-                        printk(KERN_WARNING 
-                              "Alternate GPT is invalid, "
-                               "using primary GPT.\n");
-                }
+               if (!good_agpt)
+                        pr_warn("Alternate GPT is invalid, using primary GPT.\n");
                 return 1;
         }
         else if (good_agpt) {
@@ -588,8 +641,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt,
                 *ptes = aptes;
                 kfree(pgpt);
                 kfree(pptes);
-                printk(KERN_WARNING 
-                       "Primary GPT is invalid, using alternate GPT.\n");
+               pr_warn("Primary GPT is invalid, using alternate GPT.\n");
                 return 1;
         }
 
@@ -651,8 +703,7 @@ int efi_partition(struct parsed_partitions *state)
                put_partition(state, i+1, start * ssz, size * ssz);
 
                /* If this is a RAID volume, tell md */
-               if (!efi_guidcmp(ptes[i].partition_type_guid,
-                                PARTITION_LINUX_RAID_GUID))
+               if (!efi_guidcmp(ptes[i].partition_type_guid, PARTITION_LINUX_RAID_GUID))
                        state->parts[i + 1].flags = ADDPART_FLAG_RAID;
 
                info = &state->parts[i + 1].info;
index b69ab72..4efcafb 100644 (file)
@@ -37,6 +37,9 @@
 #define EFI_PMBR_OSTYPE_EFI 0xEF
 #define EFI_PMBR_OSTYPE_EFI_GPT 0xEE
 
+#define GPT_MBR_PROTECTIVE  1
+#define GPT_MBR_HYBRID      2
+
 #define GPT_HEADER_SIGNATURE 0x5452415020494645ULL
 #define GPT_HEADER_REVISION_V1 0x00010000
 #define GPT_PRIMARY_PARTITION_TABLE_LBA 1
@@ -101,11 +104,25 @@ typedef struct _gpt_entry {
        efi_char16_t partition_name[72 / sizeof (efi_char16_t)];
 } __attribute__ ((packed)) gpt_entry;
 
+typedef struct _gpt_mbr_record {
+       u8      boot_indicator; /* unused by EFI, set to 0x80 for bootable */
+       u8      start_head;     /* unused by EFI, pt start in CHS */
+       u8      start_sector;   /* unused by EFI, pt start in CHS */
+       u8      start_track;
+       u8      os_type;        /* EFI and legacy non-EFI OS types */
+       u8      end_head;       /* unused by EFI, pt end in CHS */
+       u8      end_sector;     /* unused by EFI, pt end in CHS */
+       u8      end_track;      /* unused by EFI, pt end in CHS */
+       __le32  starting_lba;   /* used by EFI - start addr of the on disk pt */
+       __le32  size_in_lba;    /* used by EFI - size of pt in LBA */
+} __packed gpt_mbr_record;
+
+
 typedef struct _legacy_mbr {
        u8 boot_code[440];
        __le32 unique_mbr_signature;
        __le16 unknown;
-       struct partition partition_record[4];
+       gpt_mbr_record partition_record[4];
        __le16 signature;
 } __attribute__ ((packed)) legacy_mbr;
 
@@ -113,22 +130,3 @@ typedef struct _legacy_mbr {
 extern int efi_partition(struct parsed_partitions *state);
 
 #endif
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only.  This must remain at the end
- * of the file.
- * --------------------------------------------------------------------------
- * Local variables:
- * c-indent-level: 4 
- * c-brace-imaginary-offset: 0
- * c-brace-offset: -4
- * c-argdecl-indent: 4
- * c-label-offset: -4
- * c-continued-statement-offset: 4
- * c-continued-brace-offset: 0
- * indent-tabs-mode: nil
- * tab-width: 8
- * End:
- */
index 2d5ed08..80019ba 100644 (file)
@@ -83,7 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
 obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
+obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
index 320ea4d..a2b39c5 100644 (file)
@@ -34,6 +34,8 @@ EXPORT_SYMBOL_GPL(crypto_alg_sem);
 BLOCKING_NOTIFIER_HEAD(crypto_chain);
 EXPORT_SYMBOL_GPL(crypto_chain);
 
+static struct crypto_alg *crypto_larval_wait(struct crypto_alg *alg);
+
 struct crypto_alg *crypto_mod_get(struct crypto_alg *alg)
 {
        return try_module_get(alg->cra_module) ? crypto_alg_get(alg) : NULL;
@@ -144,8 +146,11 @@ static struct crypto_alg *crypto_larval_add(const char *name, u32 type,
        }
        up_write(&crypto_alg_sem);
 
-       if (alg != &larval->alg)
+       if (alg != &larval->alg) {
                kfree(larval);
+               if (crypto_is_larval(alg))
+                       alg = crypto_larval_wait(alg);
+       }
 
        return alg;
 }
diff --git a/crypto/crct10dif.c b/crypto/crct10dif.c
deleted file mode 100644 (file)
index 92aca96..0000000
+++ /dev/null
@@ -1,178 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/string.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-       __u16 crc;
-};
-
-/* Table generated using the following polynomium:
- * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
- * gt: 0x8bb7
- */
-static const __u16 t10_dif_crc_table[256] = {
-       0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
-       0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
-       0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
-       0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
-       0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
-       0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
-       0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
-       0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
-       0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
-       0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
-       0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
-       0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
-       0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
-       0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
-       0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
-       0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
-       0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
-       0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
-       0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
-       0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
-       0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
-       0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
-       0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
-       0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
-       0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
-       0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
-       0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
-       0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
-       0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
-       0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
-       0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
-       0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
-};
-
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
-{
-       unsigned int i;
-
-       for (i = 0 ; i < len ; i++)
-               crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
-
-       return crc;
-}
-EXPORT_SYMBOL(crc_t10dif_generic);
-
-/*
- * Steps through buffer one byte at at time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       ctx->crc = 0;
-
-       return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-                        unsigned int length)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-       return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       *(__u16 *)out = ctx->crc;
-       return 0;
-}
-
-static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
-                       u8 *out)
-{
-       *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
-       return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-                       unsigned int len, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-                        unsigned int length, u8 *out)
-{
-       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-       return __chksum_finup(&ctx->crc, data, length, out);
-}
-
-static struct shash_alg alg = {
-       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
-       .init           =       chksum_init,
-       .update         =       chksum_update,
-       .final          =       chksum_final,
-       .finup          =       chksum_finup,
-       .digest         =       chksum_digest,
-       .descsize               =       sizeof(struct chksum_desc_ctx),
-       .base                   =       {
-               .cra_name               =       "crct10dif",
-               .cra_driver_name        =       "crct10dif-generic",
-               .cra_priority           =       100,
-               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
-               .cra_module             =       THIS_MODULE,
-       }
-};
-
-static int __init crct10dif_mod_init(void)
-{
-       int ret;
-
-       ret = crypto_register_shash(&alg);
-       return ret;
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-       crypto_unregister_shash(&alg);
-}
-
-module_init(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
diff --git a/crypto/crct10dif_common.c b/crypto/crct10dif_common.c
new file mode 100644 (file)
index 0000000..b2fab36
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Cryptographic API.
+ *
+ * T10 Data Integrity Field CRC16 Crypto Transform
+ *
+ * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
+ * Written by Martin K. Petersen <martin.petersen@oracle.com>
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/crc-t10dif.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+       0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+       0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+       0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+       0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+       0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+       0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+       0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+       0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+       0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+       0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+       0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+       0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+       0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+       0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+       0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+       0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+       0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+       0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+       0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+       0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+       0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+       0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+       0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+       0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+       0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+       0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+       0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+       0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+       0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+       0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+       0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+       0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
+
+__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
+{
+       unsigned int i;
+
+       for (i = 0 ; i < len ; i++)
+               crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
+
+       return crc;
+}
+EXPORT_SYMBOL(crc_t10dif_generic);
+
+MODULE_DESCRIPTION("T10 DIF CRC calculation common code");
+MODULE_LICENSE("GPL");
diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c
new file mode 100644 (file)
index 0000000..877e711
--- /dev/null
@@ -0,0 +1,127 @@
+/*
+ * Cryptographic API.
+ *
+ * T10 Data Integrity Field CRC16 Crypto Transform
+ *
+ * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
+ * Written by Martin K. Petersen <martin.petersen@oracle.com>
+ * Copyright (C) 2013 Intel Corporation
+ * Author: Tim Chen <tim.c.chen@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/crc-t10dif.h>
+#include <crypto/internal/hash.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+
+struct chksum_desc_ctx {
+       __u16 crc;
+};
+
+/*
+ * Steps through buffer one byte at at time, calculates reflected
+ * crc using table.
+ */
+
+static int chksum_init(struct shash_desc *desc)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       ctx->crc = 0;
+
+       return 0;
+}
+
+static int chksum_update(struct shash_desc *desc, const u8 *data,
+                        unsigned int length)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
+       return 0;
+}
+
+static int chksum_final(struct shash_desc *desc, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       *(__u16 *)out = ctx->crc;
+       return 0;
+}
+
+static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
+                       u8 *out)
+{
+       *(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
+       return 0;
+}
+
+static int chksum_finup(struct shash_desc *desc, const u8 *data,
+                       unsigned int len, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       return __chksum_finup(&ctx->crc, data, len, out);
+}
+
+static int chksum_digest(struct shash_desc *desc, const u8 *data,
+                        unsigned int length, u8 *out)
+{
+       struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
+
+       return __chksum_finup(&ctx->crc, data, length, out);
+}
+
+static struct shash_alg alg = {
+       .digestsize             =       CRC_T10DIF_DIGEST_SIZE,
+       .init           =       chksum_init,
+       .update         =       chksum_update,
+       .final          =       chksum_final,
+       .finup          =       chksum_finup,
+       .digest         =       chksum_digest,
+       .descsize               =       sizeof(struct chksum_desc_ctx),
+       .base                   =       {
+               .cra_name               =       "crct10dif",
+               .cra_driver_name        =       "crct10dif-generic",
+               .cra_priority           =       100,
+               .cra_blocksize          =       CRC_T10DIF_BLOCK_SIZE,
+               .cra_module             =       THIS_MODULE,
+       }
+};
+
+static int __init crct10dif_mod_init(void)
+{
+       int ret;
+
+       ret = crypto_register_shash(&alg);
+       return ret;
+}
+
+static void __exit crct10dif_mod_fini(void)
+{
+       crypto_unregister_shash(&alg);
+}
+
+module_init(crct10dif_mod_init);
+module_exit(crct10dif_mod_fini);
+
+MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
+MODULE_DESCRIPTION("T10 DIF CRC calculation.");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("crct10dif");
index 6a38218..fb78bb9 100644 (file)
@@ -257,12 +257,13 @@ static int acpi_lpss_create_device(struct acpi_device *adev,
                                pdata->mmio_size = resource_size(&rentry->res);
                        pdata->mmio_base = ioremap(rentry->res.start,
                                                   pdata->mmio_size);
-                       pdata->dev_desc = dev_desc;
                        break;
                }
 
        acpi_dev_free_resource_list(&resource_list);
 
+       pdata->dev_desc = dev_desc;
+
        if (dev_desc->clk_required) {
                ret = register_device_clock(adev, pdata);
                if (ret) {
index 2bdba6f..f0b09bf 100644 (file)
@@ -57,6 +57,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *val_desc,
                              union acpi_operand_object *dest_desc,
                              struct acpi_walk_state *walk_state);
 
+static acpi_status
+acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc,
+                            struct acpi_namespace_node *node,
+                            struct acpi_walk_state *walk_state);
+
 /*******************************************************************************
  *
  * FUNCTION:    acpi_ex_store
@@ -375,7 +380,11 @@ acpi_ex_store_object_to_index(union acpi_operand_object *source_desc,
  *              When storing into an object the data is converted to the
  *              target object type then stored in the object. This means
  *              that the target object type (for an initialized target) will
- *              not be changed by a store operation.
+ *              not be changed by a store operation. A copy_object can change
+ *              the target type, however.
+ *
+ *              The implicit_conversion flag is set to NO/FALSE only when
+ *              storing to an arg_x -- as per the rules of the ACPI spec.
  *
  *              Assumes parameters are already validated.
  *
@@ -399,7 +408,7 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
        target_type = acpi_ns_get_type(node);
        target_desc = acpi_ns_get_attached_object(node);
 
-       ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p(%s) into node %p(%s)\n",
+       ACPI_DEBUG_PRINT((ACPI_DB_EXEC, "Storing %p (%s) to node %p (%s)\n",
                          source_desc,
                          acpi_ut_get_object_type_name(source_desc), node,
                          acpi_ut_get_type_name(target_type)));
@@ -413,45 +422,30 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
                return_ACPI_STATUS(status);
        }
 
-       /* If no implicit conversion, drop into the default case below */
-
-       if ((!implicit_conversion) ||
-           ((walk_state->opcode == AML_COPY_OP) &&
-            (target_type != ACPI_TYPE_LOCAL_REGION_FIELD) &&
-            (target_type != ACPI_TYPE_LOCAL_BANK_FIELD) &&
-            (target_type != ACPI_TYPE_LOCAL_INDEX_FIELD))) {
-               /*
-                * Force execution of default (no implicit conversion). Note:
-                * copy_object does not perform an implicit conversion, as per the ACPI
-                * spec -- except in case of region/bank/index fields -- because these
-                * objects must retain their original type permanently.
-                */
-               target_type = ACPI_TYPE_ANY;
-       }
-
        /* Do the actual store operation */
 
        switch (target_type) {
-       case ACPI_TYPE_BUFFER_FIELD:
-       case ACPI_TYPE_LOCAL_REGION_FIELD:
-       case ACPI_TYPE_LOCAL_BANK_FIELD:
-       case ACPI_TYPE_LOCAL_INDEX_FIELD:
-
-               /* For fields, copy the source data to the target field. */
-
-               status = acpi_ex_write_data_to_field(source_desc, target_desc,
-                                                    &walk_state->result_obj);
-               break;
-
        case ACPI_TYPE_INTEGER:
        case ACPI_TYPE_STRING:
        case ACPI_TYPE_BUFFER:
                /*
-                * These target types are all of type Integer/String/Buffer, and
-                * therefore support implicit conversion before the store.
-                *
-                * Copy and/or convert the source object to a new target object
+                * The simple data types all support implicit source operand
+                * conversion before the store.
                 */
+
+               if ((walk_state->opcode == AML_COPY_OP) || !implicit_conversion) {
+                       /*
+                        * However, copy_object and Stores to arg_x do not perform
+                        * an implicit conversion, as per the ACPI specification.
+                        * A direct store is performed instead.
+                        */
+                       status = acpi_ex_store_direct_to_node(source_desc, node,
+                                                             walk_state);
+                       break;
+               }
+
+               /* Store with implicit source operand conversion support */
+
                status =
                    acpi_ex_store_object_to_object(source_desc, target_desc,
                                                   &new_desc, walk_state);
@@ -465,13 +459,12 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
                         * the Name's type to that of the value being stored in it.
                         * source_desc reference count is incremented by attach_object.
                         *
-                        * Note: This may change the type of the node if an explicit store
-                        * has been performed such that the node/object type has been
-                        * changed.
+                        * Note: This may change the type of the node if an explicit
+                        * store has been performed such that the node/object type
+                        * has been changed.
                         */
-                       status =
-                           acpi_ns_attach_object(node, new_desc,
-                                                 new_desc->common.type);
+                       status = acpi_ns_attach_object(node, new_desc,
+                                                      new_desc->common.type);
 
                        ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
                                          "Store %s into %s via Convert/Attach\n",
@@ -482,38 +475,83 @@ acpi_ex_store_object_to_node(union acpi_operand_object *source_desc,
                }
                break;
 
-       default:
-
-               ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
-                                 "Storing [%s] (%p) directly into node [%s] (%p)"
-                                 " with no implicit conversion\n",
-                                 acpi_ut_get_object_type_name(source_desc),
-                                 source_desc,
-                                 acpi_ut_get_object_type_name(target_desc),
-                                 node));
+       case ACPI_TYPE_BUFFER_FIELD:
+       case ACPI_TYPE_LOCAL_REGION_FIELD:
+       case ACPI_TYPE_LOCAL_BANK_FIELD:
+       case ACPI_TYPE_LOCAL_INDEX_FIELD:
+               /*
+                * For all fields, always write the source data to the target
+                * field. Any required implicit source operand conversion is
+                * performed in the function below as necessary. Note, field
+                * objects must retain their original type permanently.
+                */
+               status = acpi_ex_write_data_to_field(source_desc, target_desc,
+                                                    &walk_state->result_obj);
+               break;
 
+       default:
                /*
                 * No conversions for all other types. Directly store a copy of
-                * the source object. NOTE: This is a departure from the ACPI
-                * spec, which states "If conversion is impossible, abort the
-                * running control method".
+                * the source object. This is the ACPI spec-defined behavior for
+                * the copy_object operator.
                 *
-                * This code implements "If conversion is impossible, treat the
-                * Store operation as a CopyObject".
+                * NOTE: For the Store operator, this is a departure from the
+                * ACPI spec, which states "If conversion is impossible, abort
+                * the running control method". Instead, this code implements
+                * "If conversion is impossible, treat the Store operation as
+                * a CopyObject".
                 */
-               status =
-                   acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc,
-                                                   walk_state);
-               if (ACPI_FAILURE(status)) {
-                       return_ACPI_STATUS(status);
-               }
-
-               status =
-                   acpi_ns_attach_object(node, new_desc,
-                                         new_desc->common.type);
-               acpi_ut_remove_reference(new_desc);
+               status = acpi_ex_store_direct_to_node(source_desc, node,
+                                                     walk_state);
                break;
        }
 
        return_ACPI_STATUS(status);
 }
+
+/*******************************************************************************
+ *
+ * FUNCTION:    acpi_ex_store_direct_to_node
+ *
+ * PARAMETERS:  source_desc             - Value to be stored
+ *              node                    - Named object to receive the value
+ *              walk_state              - Current walk state
+ *
+ * RETURN:      Status
+ *
+ * DESCRIPTION: "Store" an object directly to a node. This involves a copy
+ *              and an attach.
+ *
+ ******************************************************************************/
+
+static acpi_status
+acpi_ex_store_direct_to_node(union acpi_operand_object *source_desc,
+                            struct acpi_namespace_node *node,
+                            struct acpi_walk_state *walk_state)
+{
+       acpi_status status;
+       union acpi_operand_object *new_desc;
+
+       ACPI_FUNCTION_TRACE(ex_store_direct_to_node);
+
+       ACPI_DEBUG_PRINT((ACPI_DB_EXEC,
+                         "Storing [%s] (%p) directly into node [%s] (%p)"
+                         " with no implicit conversion\n",
+                         acpi_ut_get_object_type_name(source_desc),
+                         source_desc, acpi_ut_get_type_name(node->type),
+                         node));
+
+       /* Copy the source object to a new object */
+
+       status =
+           acpi_ut_copy_iobject_to_iobject(source_desc, &new_desc, walk_state);
+       if (ACPI_FAILURE(status)) {
+               return_ACPI_STATUS(status);
+       }
+
+       /* Attach the new object to the node */
+
+       status = acpi_ns_attach_object(node, new_desc, new_desc->common.type);
+       acpi_ut_remove_reference(new_desc);
+       return_ACPI_STATUS(status);
+}
index 9467229..10f0f40 100644 (file)
@@ -79,6 +79,9 @@ static struct acpi_bus_type *acpi_get_bus_type(struct device *dev)
        return ret;
 }
 
+#define FIND_CHILD_MIN_SCORE   1
+#define FIND_CHILD_MAX_SCORE   2
+
 static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
                                  void *not_used, void **ret_p)
 {
@@ -92,14 +95,17 @@ static acpi_status acpi_dev_present(acpi_handle handle, u32 lvl_not_used,
        return AE_OK;
 }
 
-static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
+static int do_find_child_checks(acpi_handle handle, bool is_bridge)
 {
+       bool sta_present = true;
        unsigned long long sta;
        acpi_status status;
 
-       status = acpi_bus_get_status_handle(handle, &sta);
-       if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
-               return false;
+       status = acpi_evaluate_integer(handle, "_STA", NULL, &sta);
+       if (status == AE_NOT_FOUND)
+               sta_present = false;
+       else if (ACPI_FAILURE(status) || !(sta & ACPI_STA_DEVICE_ENABLED))
+               return -ENODEV;
 
        if (is_bridge) {
                void *test = NULL;
@@ -107,16 +113,17 @@ static bool acpi_extra_checks_passed(acpi_handle handle, bool is_bridge)
                /* Check if this object has at least one child device. */
                acpi_walk_namespace(ACPI_TYPE_DEVICE, handle, 1,
                                    acpi_dev_present, NULL, NULL, &test);
-               return !!test;
+               if (!test)
+                       return -ENODEV;
        }
-       return true;
+       return sta_present ? FIND_CHILD_MAX_SCORE : FIND_CHILD_MIN_SCORE;
 }
 
 struct find_child_context {
        u64 addr;
        bool is_bridge;
        acpi_handle ret;
-       bool ret_checked;
+       int ret_score;
 };
 
 static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
@@ -125,6 +132,7 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
        struct find_child_context *context = data;
        unsigned long long addr;
        acpi_status status;
+       int score;
 
        status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &addr);
        if (ACPI_FAILURE(status) || addr != context->addr)
@@ -144,15 +152,20 @@ static acpi_status do_find_child(acpi_handle handle, u32 lvl_not_used,
         * its handle if so.  Second, check the same for the object that we've
         * just found.
         */
-       if (!context->ret_checked) {
-               if (acpi_extra_checks_passed(context->ret, context->is_bridge))
+       if (!context->ret_score) {
+               score = do_find_child_checks(context->ret, context->is_bridge);
+               if (score == FIND_CHILD_MAX_SCORE)
                        return AE_CTRL_TERMINATE;
                else
-                       context->ret_checked = true;
+                       context->ret_score = score;
        }
-       if (acpi_extra_checks_passed(handle, context->is_bridge)) {
+       score = do_find_child_checks(handle, context->is_bridge);
+       if (score == FIND_CHILD_MAX_SCORE) {
                context->ret = handle;
                return AE_CTRL_TERMINATE;
+       } else if (score > context->ret_score) {
+               context->ret = handle;
+               context->ret_score = score;
        }
        return AE_OK;
 }
index 61d090b..fbdb82e 100644 (file)
@@ -204,8 +204,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
                return -EINVAL;
        }
 
-       lock_device_hotplug();
-
        /*
         * Carry out two passes here and ignore errors in the first pass,
         * because if the devices in question are memory blocks and
@@ -236,9 +234,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
                                            ACPI_UINT32_MAX,
                                            acpi_bus_online_companions, NULL,
                                            NULL, NULL);
-
-                       unlock_device_hotplug();
-
                        put_device(&device->dev);
                        return -EBUSY;
                }
@@ -249,8 +244,6 @@ static int acpi_scan_hot_remove(struct acpi_device *device)
 
        acpi_bus_trim(device);
 
-       unlock_device_hotplug();
-
        /* Device node has been unregistered. */
        put_device(&device->dev);
        device = NULL;
@@ -289,6 +282,7 @@ static void acpi_bus_device_eject(void *context)
        u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
        int error;
 
+       lock_device_hotplug();
        mutex_lock(&acpi_scan_lock);
 
        acpi_bus_get_device(handle, &device);
@@ -312,6 +306,7 @@ static void acpi_bus_device_eject(void *context)
 
  out:
        mutex_unlock(&acpi_scan_lock);
+       unlock_device_hotplug();
        return;
 
  err_out:
@@ -326,8 +321,8 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
        u32 ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
        int error;
 
-       mutex_lock(&acpi_scan_lock);
        lock_device_hotplug();
+       mutex_lock(&acpi_scan_lock);
 
        if (ost_source != ACPI_NOTIFY_BUS_CHECK) {
                acpi_bus_get_device(handle, &device);
@@ -353,9 +348,9 @@ static void acpi_scan_bus_device_check(acpi_handle handle, u32 ost_source)
                kobject_uevent(&device->dev.kobj, KOBJ_ONLINE);
 
  out:
-       unlock_device_hotplug();
        acpi_evaluate_hotplug_ost(handle, ost_source, ost_code, NULL);
        mutex_unlock(&acpi_scan_lock);
+       unlock_device_hotplug();
 }
 
 static void acpi_scan_bus_check(void *context)
@@ -446,6 +441,7 @@ void acpi_bus_hot_remove_device(void *context)
        acpi_handle handle = device->handle;
        int error;
 
+       lock_device_hotplug();
        mutex_lock(&acpi_scan_lock);
 
        error = acpi_scan_hot_remove(device);
@@ -455,6 +451,7 @@ void acpi_bus_hot_remove_device(void *context)
                                          NULL);
 
        mutex_unlock(&acpi_scan_lock);
+       unlock_device_hotplug();
        kfree(context);
 }
 EXPORT_SYMBOL(acpi_bus_hot_remove_device);
index 1219ab7..1e16cbd 100644 (file)
@@ -77,9 +77,36 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)
        return dmabuf->ops->mmap(dmabuf, vma);
 }
 
+static loff_t dma_buf_llseek(struct file *file, loff_t offset, int whence)
+{
+       struct dma_buf *dmabuf;
+       loff_t base;
+
+       if (!is_dma_buf_file(file))
+               return -EBADF;
+
+       dmabuf = file->private_data;
+
+       /* only support discovering the end of the buffer,
+          but also allow SEEK_SET to maintain the idiomatic
+          SEEK_END(0), SEEK_CUR(0) pattern */
+       if (whence == SEEK_END)
+               base = dmabuf->size;
+       else if (whence == SEEK_SET)
+               base = 0;
+       else
+               return -EINVAL;
+
+       if (offset != 0)
+               return -EINVAL;
+
+       return base + offset;
+}
+
 static const struct file_operations dma_buf_fops = {
        .release        = dma_buf_release,
        .mmap           = dma_buf_mmap_internal,
+       .llseek         = dma_buf_llseek,
 };
 
 /*
@@ -133,7 +160,12 @@ struct dma_buf *dma_buf_export_named(void *priv, const struct dma_buf_ops *ops,
        dmabuf->exp_name = exp_name;
 
        file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf, flags);
+       if (IS_ERR(file)) {
+               kfree(dmabuf);
+               return ERR_CAST(file);
+       }
 
+       file->f_mode |= FMODE_LSEEK;
        dmabuf->file = file;
 
        mutex_init(&dmabuf->lock);
index 7616a77..bc9f43b 100644 (file)
@@ -125,13 +125,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                       nid, K(node_page_state(nid, NR_WRITEBACK)),
                       nid, K(node_page_state(nid, NR_FILE_PAGES)),
                       nid, K(node_page_state(nid, NR_FILE_MAPPED)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-                      nid, K(node_page_state(nid, NR_ANON_PAGES)
-                       + node_page_state(nid, NR_ANON_TRANSPARENT_HUGEPAGES) *
-                       HPAGE_PMD_NR),
-#else
                       nid, K(node_page_state(nid, NR_ANON_PAGES)),
-#endif
                       nid, K(node_page_state(nid, NR_SHMEM)),
                       nid, node_page_state(nid, NR_KERNEL_STACK) *
                                THREAD_SIZE / 1024,
index 025c41d..14a9d19 100644 (file)
@@ -1,5 +1,5 @@
 /* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "83"
+#define VERSION "85"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
@@ -169,6 +169,7 @@ struct aoedev {
        ulong ref;
        struct work_struct work;/* disk create work struct */
        struct gendisk *gd;
+       struct dentry *debugfs;
        struct request_queue *blkq;
        struct hd_geometry geo;
        sector_t ssize;
@@ -206,6 +207,7 @@ struct ktstate {
 int aoeblk_init(void);
 void aoeblk_exit(void);
 void aoeblk_gdalloc(void *);
+void aoedisk_rm_debugfs(struct aoedev *d);
 void aoedisk_rm_sysfs(struct aoedev *d);
 
 int aoechr_init(void);
index 916d9ed..dd73e1f 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012 Coraid, Inc.  See COPYING for GPL terms. */
+/* Copyright (c) 2013 Coraid, Inc.  See COPYING for GPL terms. */
 /*
  * aoeblk.c
  * block device routines
 #include <linux/mutex.h>
 #include <linux/export.h>
 #include <linux/moduleparam.h>
+#include <linux/debugfs.h>
 #include <scsi/sg.h>
 #include "aoe.h"
 
 static DEFINE_MUTEX(aoeblk_mutex);
 static struct kmem_cache *buf_pool_cache;
+static struct dentry *aoe_debugfs_dir;
 
 /* GPFS needs a larger value than the default. */
 static int aoe_maxsectors;
@@ -108,6 +110,55 @@ static ssize_t aoedisk_show_payload(struct device *dev,
        return snprintf(page, PAGE_SIZE, "%lu\n", d->maxbcnt);
 }
 
+static int aoedisk_debugfs_show(struct seq_file *s, void *ignored)
+{
+       struct aoedev *d;
+       struct aoetgt **t, **te;
+       struct aoeif *ifp, *ife;
+       unsigned long flags;
+       char c;
+
+       d = s->private;
+       seq_printf(s, "rttavg: %d rttdev: %d\n",
+               d->rttavg >> RTTSCALE,
+               d->rttdev >> RTTDSCALE);
+       seq_printf(s, "nskbpool: %d\n", skb_queue_len(&d->skbpool));
+       seq_printf(s, "kicked: %ld\n", d->kicked);
+       seq_printf(s, "maxbcnt: %ld\n", d->maxbcnt);
+       seq_printf(s, "ref: %ld\n", d->ref);
+
+       spin_lock_irqsave(&d->lock, flags);
+       t = d->targets;
+       te = t + d->ntargets;
+       for (; t < te && *t; t++) {
+               c = '\t';
+               seq_printf(s, "falloc: %ld\n", (*t)->falloc);
+               seq_printf(s, "ffree: %p\n",
+                       list_empty(&(*t)->ffree) ? NULL : (*t)->ffree.next);
+               seq_printf(s, "%pm:%d:%d:%d\n", (*t)->addr, (*t)->nout,
+                       (*t)->maxout, (*t)->nframes);
+               seq_printf(s, "\tssthresh:%d\n", (*t)->ssthresh);
+               seq_printf(s, "\ttaint:%d\n", (*t)->taint);
+               seq_printf(s, "\tr:%d\n", (*t)->rpkts);
+               seq_printf(s, "\tw:%d\n", (*t)->wpkts);
+               ifp = (*t)->ifs;
+               ife = ifp + ARRAY_SIZE((*t)->ifs);
+               for (; ifp->nd && ifp < ife; ifp++) {
+                       seq_printf(s, "%c%s", c, ifp->nd->name);
+                       c = ',';
+               }
+               seq_puts(s, "\n");
+       }
+       spin_unlock_irqrestore(&d->lock, flags);
+
+       return 0;
+}
+
+static int aoe_debugfs_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, aoedisk_debugfs_show, inode->i_private);
+}
+
 static DEVICE_ATTR(state, S_IRUGO, aoedisk_show_state, NULL);
 static DEVICE_ATTR(mac, S_IRUGO, aoedisk_show_mac, NULL);
 static DEVICE_ATTR(netif, S_IRUGO, aoedisk_show_netif, NULL);
@@ -130,6 +181,44 @@ static const struct attribute_group attr_group = {
        .attrs = aoe_attrs,
 };
 
+static const struct file_operations aoe_debugfs_fops = {
+       .open = aoe_debugfs_open,
+       .read = seq_read,
+       .llseek = seq_lseek,
+       .release = single_release,
+};
+
+static void
+aoedisk_add_debugfs(struct aoedev *d)
+{
+       struct dentry *entry;
+       char *p;
+
+       if (aoe_debugfs_dir == NULL)
+               return;
+       p = strchr(d->gd->disk_name, '/');
+       if (p == NULL)
+               p = d->gd->disk_name;
+       else
+               p++;
+       BUG_ON(*p == '\0');
+       entry = debugfs_create_file(p, 0444, aoe_debugfs_dir, d,
+                                   &aoe_debugfs_fops);
+       if (IS_ERR_OR_NULL(entry)) {
+               pr_info("aoe: cannot create debugfs file for %s\n",
+                       d->gd->disk_name);
+               return;
+       }
+       BUG_ON(d->debugfs);
+       d->debugfs = entry;
+}
+void
+aoedisk_rm_debugfs(struct aoedev *d)
+{
+       debugfs_remove(d->debugfs);
+       d->debugfs = NULL;
+}
+
 static int
 aoedisk_add_sysfs(struct aoedev *d)
 {
@@ -330,6 +419,7 @@ aoeblk_gdalloc(void *vp)
 
        add_disk(gd);
        aoedisk_add_sysfs(d);
+       aoedisk_add_debugfs(d);
 
        spin_lock_irqsave(&d->lock, flags);
        WARN_ON(!(d->flags & DEVFL_GD_NOW));
@@ -351,6 +441,8 @@ err:
 void
 aoeblk_exit(void)
 {
+       debugfs_remove_recursive(aoe_debugfs_dir);
+       aoe_debugfs_dir = NULL;
        kmem_cache_destroy(buf_pool_cache);
 }
 
@@ -362,7 +454,11 @@ aoeblk_init(void)
                                           0, 0, NULL);
        if (buf_pool_cache == NULL)
                return -ENOMEM;
-
+       aoe_debugfs_dir = debugfs_create_dir("aoe", NULL);
+       if (IS_ERR_OR_NULL(aoe_debugfs_dir)) {
+               pr_info("aoe: cannot create debugfs directory\n");
+               aoe_debugfs_dir = NULL;
+       }
        return 0;
 }
 
index 4d45dba..d251543 100644 (file)
@@ -380,7 +380,6 @@ aoecmd_ata_rw(struct aoedev *d)
 {
        struct frame *f;
        struct buf *buf;
-       struct aoetgt *t;
        struct sk_buff *skb;
        struct sk_buff_head queue;
        ulong bcnt, fbcnt;
@@ -391,7 +390,6 @@ aoecmd_ata_rw(struct aoedev *d)
        f = newframe(d);
        if (f == NULL)
                return 0;
-       t = *d->tgt;
        bcnt = d->maxbcnt;
        if (bcnt == 0)
                bcnt = DEFAULTBCNT;
@@ -485,7 +483,6 @@ resend(struct aoedev *d, struct frame *f)
        struct sk_buff *skb;
        struct sk_buff_head queue;
        struct aoe_hdr *h;
-       struct aoe_atahdr *ah;
        struct aoetgt *t;
        char buf[128];
        u32 n;
@@ -500,7 +497,6 @@ resend(struct aoedev *d, struct frame *f)
                return;
        }
        h = (struct aoe_hdr *) skb_mac_header(skb);
-       ah = (struct aoe_atahdr *) (h+1);
 
        if (!(f->flags & FFL_PROBE)) {
                snprintf(buf, sizeof(buf),
index 784c92e..e774c50 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/bitmap.h>
 #include <linux/kdev_t.h>
 #include <linux/moduleparam.h>
+#include <linux/string.h>
 #include "aoe.h"
 
 static void dummy_timer(ulong);
@@ -241,16 +242,12 @@ aoedev_downdev(struct aoedev *d)
 static int
 user_req(char *s, size_t slen, struct aoedev *d)
 {
-       char *p;
+       const char *p;
        size_t lim;
 
        if (!d->gd)
                return 0;
-       p = strrchr(d->gd->disk_name, '/');
-       if (!p)
-               p = d->gd->disk_name;
-       else
-               p += 1;
+       p = kbasename(d->gd->disk_name);
        lim = sizeof(d->gd->disk_name);
        lim -= p - d->gd->disk_name;
        if (slen < lim)
@@ -278,6 +275,7 @@ freedev(struct aoedev *d)
 
        del_timer_sync(&d->timer);
        if (d->gd) {
+               aoedisk_rm_debugfs(d);
                aoedisk_rm_sysfs(d);
                del_gendisk(d->gd);
                put_disk(d->gd);
index 62b6c2c..d2d95ff 100644 (file)
@@ -4257,6 +4257,13 @@ static void cciss_find_board_params(ctlr_info_t *h)
        cciss_get_max_perf_mode_cmds(h);
        h->nr_cmds = h->max_commands - 4 - cciss_tape_cmds;
        h->maxsgentries = readl(&(h->cfgtable->MaxSGElements));
+       /*
+        * The P600 may exhibit poor performnace under some workloads
+        * if we use the value in the configuration table. Limit this
+        * controller to MAXSGENTRIES (32) instead.
+        */
+       if (h->board_id == 0x3225103C)
+               h->maxsgentries = MAXSGENTRIES;
        /*
         * Limit in-command s/g elements to 32 save dma'able memory.
         * Howvever spec says if 0, use 31
index a56cfcd..77a60be 100644 (file)
@@ -636,7 +636,7 @@ ok_to_write:
                mg_request(host->breq);
 }
 
-void mg_times_out(unsigned long data)
+static void mg_times_out(unsigned long data)
 {
        struct mg_host *host = (struct mg_host *)data;
        char *name;
index 1fca1f9..0ba837f 100644 (file)
@@ -4,6 +4,6 @@
 
 config BLK_DEV_PCIESSD_MTIP32XX
        tristate "Block Device Driver for Micron PCIe SSDs"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        help
           This enables the block driver for Micron PCIe SSDs.
index 1bbc681..79aa179 100644 (file)
@@ -598,7 +598,7 @@ static ssize_t class_osdblk_remove(struct class *c,
        unsigned long ul;
        struct list_head *tmp;
 
-       rc = strict_strtoul(buf, 10, &ul);
+       rc = kstrtoul(buf, 10, &ul);
        if (rc)
                return rc;
 
index f5d0ea1..5618847 100644 (file)
@@ -44,6 +44,8 @@
  *
  *************************************************************************/
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/pktcdvd.h>
 #include <linux/module.h>
 #include <linux/types.h>
 
 #define DRIVER_NAME    "pktcdvd"
 
-#if PACKET_DEBUG
-#define DPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
-#else
-#define DPRINTK(fmt, args...)
-#endif
-
-#if PACKET_DEBUG > 1
-#define VPRINTK(fmt, args...) printk(KERN_NOTICE fmt, ##args)
-#else
-#define VPRINTK(fmt, args...)
-#endif
+#define pkt_err(pd, fmt, ...)                                          \
+       pr_err("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_notice(pd, fmt, ...)                                       \
+       pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__)
+#define pkt_info(pd, fmt, ...)                                         \
+       pr_info("%s: " fmt, pd->name, ##__VA_ARGS__)
+
+#define pkt_dbg(level, pd, fmt, ...)                                   \
+do {                                                                   \
+       if (level == 2 && PACKET_DEBUG >= 2)                            \
+               pr_notice("%s: %s():" fmt,                              \
+                         pd->name, __func__, ##__VA_ARGS__);           \
+       else if (level == 1 && PACKET_DEBUG >= 1)                       \
+               pr_notice("%s: " fmt, pd->name, ##__VA_ARGS__);         \
+} while (0)
 
 #define MAX_SPEED 0xffff
 
-#define ZONE(sector, pd) (((sector) + (pd)->offset) & \
-                       ~(sector_t)((pd)->settings.size - 1))
-
 static DEFINE_MUTEX(pktcdvd_mutex);
 static struct pktcdvd_device *pkt_devs[MAX_WRITERS];
 static struct proc_dir_entry *pkt_proc;
@@ -103,7 +106,10 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev);
 static int pkt_remove_dev(dev_t pkt_dev);
 static int pkt_seq_show(struct seq_file *m, void *p);
 
-
+static sector_t get_zone(sector_t sector, struct pktcdvd_device *pd)
+{
+       return (sector + pd->offset) & ~(sector_t)(pd->settings.size - 1);
+}
 
 /*
  * create and register a pktcdvd kernel object.
@@ -424,7 +430,7 @@ static int pkt_sysfs_init(void)
        if (ret) {
                kfree(class_pktcdvd);
                class_pktcdvd = NULL;
-               printk(DRIVER_NAME": failed to create class pktcdvd\n");
+               pr_err("failed to create class pktcdvd\n");
                return ret;
        }
        return 0;
@@ -517,7 +523,7 @@ static void pkt_bio_finished(struct pktcdvd_device *pd)
 {
        BUG_ON(atomic_read(&pd->cdrw.pending_bios) <= 0);
        if (atomic_dec_and_test(&pd->cdrw.pending_bios)) {
-               VPRINTK(DRIVER_NAME": queue empty\n");
+               pkt_dbg(2, pd, "queue empty\n");
                atomic_set(&pd->iosched.attention, 1);
                wake_up(&pd->wqueue);
        }
@@ -734,36 +740,33 @@ out:
        return ret;
 }
 
+static const char *sense_key_string(__u8 index)
+{
+       static const char * const info[] = {
+               "No sense", "Recovered error", "Not ready",
+               "Medium error", "Hardware error", "Illegal request",
+               "Unit attention", "Data protect", "Blank check",
+       };
+
+       return index < ARRAY_SIZE(info) ? info[index] : "INVALID";
+}
+
 /*
  * A generic sense dump / resolve mechanism should be implemented across
  * all ATAPI + SCSI devices.
  */
-static void pkt_dump_sense(struct packet_command *cgc)
+static void pkt_dump_sense(struct pktcdvd_device *pd,
+                          struct packet_command *cgc)
 {
-       static char *info[9] = { "No sense", "Recovered error", "Not ready",
-                                "Medium error", "Hardware error", "Illegal request",
-                                "Unit attention", "Data protect", "Blank check" };
-       int i;
        struct request_sense *sense = cgc->sense;
 
-       printk(DRIVER_NAME":");
-       for (i = 0; i < CDROM_PACKET_SIZE; i++)
-               printk(" %02x", cgc->cmd[i]);
-       printk(" - ");
-
-       if (sense == NULL) {
-               printk("no sense\n");
-               return;
-       }
-
-       printk("sense %02x.%02x.%02x", sense->sense_key, sense->asc, sense->ascq);
-
-       if (sense->sense_key > 8) {
-               printk(" (INVALID)\n");
-               return;
-       }
-
-       printk(" (%s)\n", info[sense->sense_key]);
+       if (sense)
+               pkt_err(pd, "%*ph - sense %02x.%02x.%02x (%s)\n",
+                       CDROM_PACKET_SIZE, cgc->cmd,
+                       sense->sense_key, sense->asc, sense->ascq,
+                       sense_key_string(sense->sense_key));
+       else
+               pkt_err(pd, "%*ph - no sense\n", CDROM_PACKET_SIZE, cgc->cmd);
 }
 
 /*
@@ -806,7 +809,7 @@ static noinline_for_stack int pkt_set_speed(struct pktcdvd_device *pd,
        cgc.cmd[5] = write_speed & 0xff;
 
        if ((ret = pkt_generic_packet(pd, &cgc)))
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
 
        return ret;
 }
@@ -872,7 +875,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
                                need_write_seek = 0;
                        if (need_write_seek && reads_queued) {
                                if (atomic_read(&pd->cdrw.pending_bios) > 0) {
-                                       VPRINTK(DRIVER_NAME": write, waiting\n");
+                                       pkt_dbg(2, pd, "write, waiting\n");
                                        break;
                                }
                                pkt_flush_cache(pd);
@@ -881,7 +884,7 @@ static void pkt_iosched_process_queue(struct pktcdvd_device *pd)
                } else {
                        if (!reads_queued && writes_queued) {
                                if (atomic_read(&pd->cdrw.pending_bios) > 0) {
-                                       VPRINTK(DRIVER_NAME": read, waiting\n");
+                                       pkt_dbg(2, pd, "read, waiting\n");
                                        break;
                                }
                                pd->iosched.writing = 1;
@@ -943,7 +946,7 @@ static int pkt_set_segment_merging(struct pktcdvd_device *pd, struct request_que
                set_bit(PACKET_MERGE_SEGS, &pd->flags);
                return 0;
        } else {
-               printk(DRIVER_NAME": cdrom max_phys_segments too small\n");
+               pkt_err(pd, "cdrom max_phys_segments too small\n");
                return -EIO;
        }
 }
@@ -987,8 +990,9 @@ static void pkt_end_io_read(struct bio *bio, int err)
        struct pktcdvd_device *pd = pkt->pd;
        BUG_ON(!pd);
 
-       VPRINTK("pkt_end_io_read: bio=%p sec0=%llx sec=%llx err=%d\n", bio,
-               (unsigned long long)pkt->sector, (unsigned long long)bio->bi_sector, err);
+       pkt_dbg(2, pd, "bio=%p sec0=%llx sec=%llx err=%d\n",
+               bio, (unsigned long long)pkt->sector,
+               (unsigned long long)bio->bi_sector, err);
 
        if (err)
                atomic_inc(&pkt->io_errors);
@@ -1005,7 +1009,7 @@ static void pkt_end_io_packet_write(struct bio *bio, int err)
        struct pktcdvd_device *pd = pkt->pd;
        BUG_ON(!pd);
 
-       VPRINTK("pkt_end_io_packet_write: id=%d, err=%d\n", pkt->id, err);
+       pkt_dbg(2, pd, "id=%d, err=%d\n", pkt->id, err);
 
        pd->stats.pkt_ended++;
 
@@ -1047,7 +1051,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
        spin_unlock(&pkt->lock);
 
        if (pkt->cache_valid) {
-               VPRINTK("pkt_gather_data: zone %llx cached\n",
+               pkt_dbg(2, pd, "zone %llx cached\n",
                        (unsigned long long)pkt->sector);
                goto out_account;
        }
@@ -1070,7 +1074,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
 
                p = (f * CD_FRAMESIZE) / PAGE_SIZE;
                offset = (f * CD_FRAMESIZE) % PAGE_SIZE;
-               VPRINTK("pkt_gather_data: Adding frame %d, page:%p offs:%d\n",
+               pkt_dbg(2, pd, "Adding frame %d, page:%p offs:%d\n",
                        f, pkt->pages[p], offset);
                if (!bio_add_page(bio, pkt->pages[p], CD_FRAMESIZE, offset))
                        BUG();
@@ -1082,7 +1086,7 @@ static void pkt_gather_data(struct pktcdvd_device *pd, struct packet_data *pkt)
        }
 
 out_account:
-       VPRINTK("pkt_gather_data: need %d frames for zone %llx\n",
+       pkt_dbg(2, pd, "need %d frames for zone %llx\n",
                frames_read, (unsigned long long)pkt->sector);
        pd->stats.pkt_started++;
        pd->stats.secs_rg += frames_read * (CD_FRAMESIZE >> 9);
@@ -1183,7 +1187,8 @@ static inline void pkt_set_state(struct packet_data *pkt, enum packet_data_state
                "IDLE", "WAITING", "READ_WAIT", "WRITE_WAIT", "RECOVERY", "FINISHED"
        };
        enum packet_data_state old_state = pkt->state;
-       VPRINTK("pkt %2d : s=%6llx %s -> %s\n", pkt->id, (unsigned long long)pkt->sector,
+       pkt_dbg(2, pd, "pkt %2d : s=%6llx %s -> %s\n",
+               pkt->id, (unsigned long long)pkt->sector,
                state_name[old_state], state_name[state]);
 #endif
        pkt->state = state;
@@ -1202,12 +1207,10 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
        struct rb_node *n;
        int wakeup;
 
-       VPRINTK("handle_queue\n");
-
        atomic_set(&pd->scan_queue, 0);
 
        if (list_empty(&pd->cdrw.pkt_free_list)) {
-               VPRINTK("handle_queue: no pkt\n");
+               pkt_dbg(2, pd, "no pkt\n");
                return 0;
        }
 
@@ -1224,7 +1227,7 @@ static int pkt_handle_queue(struct pktcdvd_device *pd)
        node = first_node;
        while (node) {
                bio = node->bio;
-               zone = ZONE(bio->bi_sector, pd);
+               zone = get_zone(bio->bi_sector, pd);
                list_for_each_entry(p, &pd->cdrw.pkt_active_list, list) {
                        if (p->sector == zone) {
                                bio = NULL;
@@ -1244,7 +1247,7 @@ try_next_bio:
        }
        spin_unlock(&pd->lock);
        if (!bio) {
-               VPRINTK("handle_queue: no bio\n");
+               pkt_dbg(2, pd, "no bio\n");
                return 0;
        }
 
@@ -1260,12 +1263,12 @@ try_next_bio:
         * to this packet.
         */
        spin_lock(&pd->lock);
-       VPRINTK("pkt_handle_queue: looking for zone %llx\n", (unsigned long long)zone);
+       pkt_dbg(2, pd, "looking for zone %llx\n", (unsigned long long)zone);
        while ((node = pkt_rbtree_find(pd, zone)) != NULL) {
                bio = node->bio;
-               VPRINTK("pkt_handle_queue: found zone=%llx\n",
-                       (unsigned long long)ZONE(bio->bi_sector, pd));
-               if (ZONE(bio->bi_sector, pd) != zone)
+               pkt_dbg(2, pd, "found zone=%llx\n",
+                       (unsigned long long)get_zone(bio->bi_sector, pd));
+               if (get_zone(bio->bi_sector, pd) != zone)
                        break;
                pkt_rbtree_erase(pd, node);
                spin_lock(&pkt->lock);
@@ -1316,7 +1319,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
                if (!bio_add_page(pkt->w_bio, bvec[f].bv_page, CD_FRAMESIZE, bvec[f].bv_offset))
                        BUG();
        }
-       VPRINTK(DRIVER_NAME": vcnt=%d\n", pkt->w_bio->bi_vcnt);
+       pkt_dbg(2, pd, "vcnt=%d\n", pkt->w_bio->bi_vcnt);
 
        /*
         * Fill-in bvec with data from orig_bios.
@@ -1327,7 +1330,7 @@ static void pkt_start_write(struct pktcdvd_device *pd, struct packet_data *pkt)
        pkt_set_state(pkt, PACKET_WRITE_WAIT_STATE);
        spin_unlock(&pkt->lock);
 
-       VPRINTK("pkt_start_write: Writing %d frames for zone %llx\n",
+       pkt_dbg(2, pd, "Writing %d frames for zone %llx\n",
                pkt->write_size, (unsigned long long)pkt->sector);
 
        if (test_bit(PACKET_MERGE_SEGS, &pd->flags) || (pkt->write_size < pkt->frames)) {
@@ -1359,7 +1362,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
 {
        int uptodate;
 
-       VPRINTK("run_state_machine: pkt %d\n", pkt->id);
+       pkt_dbg(2, pd, "pkt %d\n", pkt->id);
 
        for (;;) {
                switch (pkt->state) {
@@ -1398,7 +1401,7 @@ static void pkt_run_state_machine(struct pktcdvd_device *pd, struct packet_data
                        if (pkt_start_recovery(pkt)) {
                                pkt_start_write(pd, pkt);
                        } else {
-                               VPRINTK("No recovery possible\n");
+                               pkt_dbg(2, pd, "No recovery possible\n");
                                pkt_set_state(pkt, PACKET_FINISHED_STATE);
                        }
                        break;
@@ -1419,8 +1422,6 @@ static void pkt_handle_packets(struct pktcdvd_device *pd)
 {
        struct packet_data *pkt, *next;
 
-       VPRINTK("pkt_handle_packets\n");
-
        /*
         * Run state machine for active packets
         */
@@ -1502,9 +1503,9 @@ static int kcdrwd(void *foobar)
                        if (PACKET_DEBUG > 1) {
                                int states[PACKET_NUM_STATES];
                                pkt_count_states(pd, states);
-                               VPRINTK("kcdrwd: i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
-                                       states[0], states[1], states[2], states[3],
-                                       states[4], states[5]);
+                               pkt_dbg(2, pd, "i:%d ow:%d rw:%d ww:%d rec:%d fin:%d\n",
+                                       states[0], states[1], states[2],
+                                       states[3], states[4], states[5]);
                        }
 
                        min_sleep_time = MAX_SCHEDULE_TIMEOUT;
@@ -1513,9 +1514,9 @@ static int kcdrwd(void *foobar)
                                        min_sleep_time = pkt->sleep_time;
                        }
 
-                       VPRINTK("kcdrwd: sleeping\n");
+                       pkt_dbg(2, pd, "sleeping\n");
                        residue = schedule_timeout(min_sleep_time);
-                       VPRINTK("kcdrwd: wake up\n");
+                       pkt_dbg(2, pd, "wake up\n");
 
                        /* make swsusp happy with our thread */
                        try_to_freeze();
@@ -1563,9 +1564,10 @@ work_to_do:
 
 static void pkt_print_settings(struct pktcdvd_device *pd)
 {
-       printk(DRIVER_NAME": %s packets, ", pd->settings.fp ? "Fixed" : "Variable");
-       printk("%u blocks, ", pd->settings.size >> 2);
-       printk("Mode-%c disc\n", pd->settings.block_mode == 8 ? '1' : '2');
+       pkt_info(pd, "%s packets, %u blocks, Mode-%c disc\n",
+                pd->settings.fp ? "Fixed" : "Variable",
+                pd->settings.size >> 2,
+                pd->settings.block_mode == 8 ? '1' : '2');
 }
 
 static int pkt_mode_sense(struct pktcdvd_device *pd, struct packet_command *cgc, int page_code, int page_control)
@@ -1699,7 +1701,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
        init_cdrom_command(&cgc, buffer, sizeof(*wp), CGC_DATA_READ);
        cgc.sense = &sense;
        if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
                return ret;
        }
 
@@ -1714,7 +1716,7 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
        init_cdrom_command(&cgc, buffer, size, CGC_DATA_READ);
        cgc.sense = &sense;
        if ((ret = pkt_mode_sense(pd, &cgc, GPMODE_WRITE_PARMS_PAGE, 0))) {
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
                return ret;
        }
 
@@ -1749,14 +1751,14 @@ static noinline_for_stack int pkt_set_write_settings(struct pktcdvd_device *pd)
                /*
                 * paranoia
                 */
-               printk(DRIVER_NAME": write mode wrong %d\n", wp->data_block_type);
+               pkt_err(pd, "write mode wrong %d\n", wp->data_block_type);
                return 1;
        }
        wp->packet_size = cpu_to_be32(pd->settings.size >> 2);
 
        cgc.buflen = cgc.cmd[8] = size;
        if ((ret = pkt_mode_select(pd, &cgc))) {
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
                return ret;
        }
 
@@ -1793,7 +1795,7 @@ static int pkt_writable_track(struct pktcdvd_device *pd, track_information *ti)
        if (ti->rt == 1 && ti->blank == 0)
                return 1;
 
-       printk(DRIVER_NAME": bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
+       pkt_err(pd, "bad state %d-%d-%d\n", ti->rt, ti->blank, ti->packet);
        return 0;
 }
 
@@ -1811,7 +1813,8 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
                case 0x12: /* DVD-RAM */
                        return 1;
                default:
-                       VPRINTK(DRIVER_NAME": Wrong disc profile (%x)\n", pd->mmc3_profile);
+                       pkt_dbg(2, pd, "Wrong disc profile (%x)\n",
+                               pd->mmc3_profile);
                        return 0;
        }
 
@@ -1820,22 +1823,22 @@ static int pkt_writable_disc(struct pktcdvd_device *pd, disc_information *di)
         * but i'm not sure, should we leave this to user apps? probably.
         */
        if (di->disc_type == 0xff) {
-               printk(DRIVER_NAME": Unknown disc. No track?\n");
+               pkt_notice(pd, "unknown disc - no track?\n");
                return 0;
        }
 
        if (di->disc_type != 0x20 && di->disc_type != 0) {
-               printk(DRIVER_NAME": Wrong disc type (%x)\n", di->disc_type);
+               pkt_err(pd, "wrong disc type (%x)\n", di->disc_type);
                return 0;
        }
 
        if (di->erasable == 0) {
-               printk(DRIVER_NAME": Disc not erasable\n");
+               pkt_notice(pd, "disc not erasable\n");
                return 0;
        }
 
        if (di->border_status == PACKET_SESSION_RESERVED) {
-               printk(DRIVER_NAME": Can't write to last track (reserved)\n");
+               pkt_err(pd, "can't write to last track (reserved)\n");
                return 0;
        }
 
@@ -1860,7 +1863,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
        memset(&ti, 0, sizeof(track_information));
 
        if ((ret = pkt_get_disc_info(pd, &di))) {
-               printk("failed get_disc\n");
+               pkt_err(pd, "failed get_disc\n");
                return ret;
        }
 
@@ -1871,12 +1874,12 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
 
        track = 1; /* (di.last_track_msb << 8) | di.last_track_lsb; */
        if ((ret = pkt_get_track_info(pd, track, 1, &ti))) {
-               printk(DRIVER_NAME": failed get_track\n");
+               pkt_err(pd, "failed get_track\n");
                return ret;
        }
 
        if (!pkt_writable_track(pd, &ti)) {
-               printk(DRIVER_NAME": can't write to this track\n");
+               pkt_err(pd, "can't write to this track\n");
                return -EROFS;
        }
 
@@ -1886,11 +1889,11 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
         */
        pd->settings.size = be32_to_cpu(ti.fixed_packet_size) << 2;
        if (pd->settings.size == 0) {
-               printk(DRIVER_NAME": detected zero packet size!\n");
+               pkt_notice(pd, "detected zero packet size!\n");
                return -ENXIO;
        }
        if (pd->settings.size > PACKET_MAX_SECTORS) {
-               printk(DRIVER_NAME": packet size is too big\n");
+               pkt_err(pd, "packet size is too big\n");
                return -EROFS;
        }
        pd->settings.fp = ti.fp;
@@ -1932,7 +1935,7 @@ static noinline_for_stack int pkt_probe_settings(struct pktcdvd_device *pd)
                        pd->settings.block_mode = PACKET_BLOCK_MODE2;
                        break;
                default:
-                       printk(DRIVER_NAME": unknown data mode\n");
+                       pkt_err(pd, "unknown data mode\n");
                        return -EROFS;
        }
        return 0;
@@ -1966,10 +1969,10 @@ static noinline_for_stack int pkt_write_caching(struct pktcdvd_device *pd,
        cgc.buflen = cgc.cmd[8] = 2 + ((buf[0] << 8) | (buf[1] & 0xff));
        ret = pkt_mode_select(pd, &cgc);
        if (ret) {
-               printk(DRIVER_NAME": write caching control failed\n");
-               pkt_dump_sense(&cgc);
+               pkt_err(pd, "write caching control failed\n");
+               pkt_dump_sense(pd, &cgc);
        } else if (!ret && set)
-               printk(DRIVER_NAME": enabled write caching on %s\n", pd->name);
+               pkt_notice(pd, "enabled write caching\n");
        return ret;
 }
 
@@ -2005,7 +2008,7 @@ static noinline_for_stack int pkt_get_max_speed(struct pktcdvd_device *pd,
                             sizeof(struct mode_page_header);
                ret = pkt_mode_sense(pd, &cgc, GPMODE_CAPABILITIES_PAGE, 0);
                if (ret) {
-                       pkt_dump_sense(&cgc);
+                       pkt_dump_sense(pd, &cgc);
                        return ret;
                }
        }
@@ -2064,7 +2067,7 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
        cgc.cmd[8] = 2;
        ret = pkt_generic_packet(pd, &cgc);
        if (ret) {
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
                return ret;
        }
        size = ((unsigned int) buf[0]<<8) + buf[1] + 2;
@@ -2079,16 +2082,16 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
        cgc.cmd[8] = size;
        ret = pkt_generic_packet(pd, &cgc);
        if (ret) {
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
                return ret;
        }
 
        if (!(buf[6] & 0x40)) {
-               printk(DRIVER_NAME": Disc type is not CD-RW\n");
+               pkt_notice(pd, "disc type is not CD-RW\n");
                return 1;
        }
        if (!(buf[6] & 0x4)) {
-               printk(DRIVER_NAME": A1 values on media are not valid, maybe not CDRW?\n");
+               pkt_notice(pd, "A1 values on media are not valid, maybe not CDRW?\n");
                return 1;
        }
 
@@ -2108,14 +2111,14 @@ static noinline_for_stack int pkt_media_speed(struct pktcdvd_device *pd,
                        *speed = us_clv_to_speed[sp];
                        break;
                default:
-                       printk(DRIVER_NAME": Unknown disc sub-type %d\n",st);
+                       pkt_notice(pd, "unknown disc sub-type %d\n", st);
                        return 1;
        }
        if (*speed) {
-               printk(DRIVER_NAME": Max. media speed: %d\n",*speed);
+               pkt_info(pd, "maximum media speed: %d\n", *speed);
                return 0;
        } else {
-               printk(DRIVER_NAME": Unknown speed %d for sub-type %d\n",sp,st);
+               pkt_notice(pd, "unknown speed %d for sub-type %d\n", sp, st);
                return 1;
        }
 }
@@ -2126,7 +2129,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
        struct request_sense sense;
        int ret;
 
-       VPRINTK(DRIVER_NAME": Performing OPC\n");
+       pkt_dbg(2, pd, "Performing OPC\n");
 
        init_cdrom_command(&cgc, NULL, 0, CGC_DATA_NONE);
        cgc.sense = &sense;
@@ -2134,7 +2137,7 @@ static noinline_for_stack int pkt_perform_opc(struct pktcdvd_device *pd)
        cgc.cmd[0] = GPCMD_SEND_OPC;
        cgc.cmd[1] = 1;
        if ((ret = pkt_generic_packet(pd, &cgc)))
-               pkt_dump_sense(&cgc);
+               pkt_dump_sense(pd, &cgc);
        return ret;
 }
 
@@ -2144,12 +2147,12 @@ static int pkt_open_write(struct pktcdvd_device *pd)
        unsigned int write_speed, media_write_speed, read_speed;
 
        if ((ret = pkt_probe_settings(pd))) {
-               VPRINTK(DRIVER_NAME": %s failed probe\n", pd->name);
+               pkt_dbg(2, pd, "failed probe\n");
                return ret;
        }
 
        if ((ret = pkt_set_write_settings(pd))) {
-               DPRINTK(DRIVER_NAME": %s failed saving write settings\n", pd->name);
+               pkt_dbg(1, pd, "failed saving write settings\n");
                return -EIO;
        }
 
@@ -2161,26 +2164,26 @@ static int pkt_open_write(struct pktcdvd_device *pd)
                case 0x13: /* DVD-RW */
                case 0x1a: /* DVD+RW */
                case 0x12: /* DVD-RAM */
-                       DPRINTK(DRIVER_NAME": write speed %ukB/s\n", write_speed);
+                       pkt_dbg(1, pd, "write speed %ukB/s\n", write_speed);
                        break;
                default:
                        if ((ret = pkt_media_speed(pd, &media_write_speed)))
                                media_write_speed = 16;
                        write_speed = min(write_speed, media_write_speed * 177);
-                       DPRINTK(DRIVER_NAME": write speed %ux\n", write_speed / 176);
+                       pkt_dbg(1, pd, "write speed %ux\n", write_speed / 176);
                        break;
        }
        read_speed = write_speed;
 
        if ((ret = pkt_set_speed(pd, write_speed, read_speed))) {
-               DPRINTK(DRIVER_NAME": %s couldn't set write speed\n", pd->name);
+               pkt_dbg(1, pd, "couldn't set write speed\n");
                return -EIO;
        }
        pd->write_speed = write_speed;
        pd->read_speed = read_speed;
 
        if ((ret = pkt_perform_opc(pd))) {
-               DPRINTK(DRIVER_NAME": %s Optimum Power Calibration failed\n", pd->name);
+               pkt_dbg(1, pd, "Optimum Power Calibration failed\n");
        }
 
        return 0;
@@ -2205,7 +2208,7 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
                goto out;
 
        if ((ret = pkt_get_last_written(pd, &lba))) {
-               printk(DRIVER_NAME": pkt_get_last_written failed\n");
+               pkt_err(pd, "pkt_get_last_written failed\n");
                goto out_putdev;
        }
 
@@ -2235,11 +2238,11 @@ static int pkt_open_dev(struct pktcdvd_device *pd, fmode_t write)
 
        if (write) {
                if (!pkt_grow_pktlist(pd, CONFIG_CDROM_PKTCDVD_BUFFERS)) {
-                       printk(DRIVER_NAME": not enough memory for buffers\n");
+                       pkt_err(pd, "not enough memory for buffers\n");
                        ret = -ENOMEM;
                        goto out_putdev;
                }
-               printk(DRIVER_NAME": %lukB available on disc\n", lba << 1);
+               pkt_info(pd, "%lukB available on disc\n", lba << 1);
        }
 
        return 0;
@@ -2257,7 +2260,7 @@ out:
 static void pkt_release_dev(struct pktcdvd_device *pd, int flush)
 {
        if (flush && pkt_flush_cache(pd))
-               DPRINTK(DRIVER_NAME": %s not flushing cache\n", pd->name);
+               pkt_dbg(1, pd, "not flushing cache\n");
 
        pkt_lock_door(pd, 0);
 
@@ -2279,8 +2282,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
        struct pktcdvd_device *pd = NULL;
        int ret;
 
-       VPRINTK(DRIVER_NAME": entering open\n");
-
        mutex_lock(&pktcdvd_mutex);
        mutex_lock(&ctl_mutex);
        pd = pkt_find_dev_from_minor(MINOR(bdev->bd_dev));
@@ -2315,7 +2316,6 @@ static int pkt_open(struct block_device *bdev, fmode_t mode)
 out_dec:
        pd->refcnt--;
 out:
-       VPRINTK(DRIVER_NAME": failed open (%d)\n", ret);
        mutex_unlock(&ctl_mutex);
        mutex_unlock(&pktcdvd_mutex);
        return ret;
@@ -2360,7 +2360,8 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
 
        pd = q->queuedata;
        if (!pd) {
-               printk(DRIVER_NAME": %s incorrect request queue\n", bdevname(bio->bi_bdev, b));
+               pr_err("%s incorrect request queue\n",
+                      bdevname(bio->bi_bdev, b));
                goto end_io;
        }
 
@@ -2382,20 +2383,20 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
        }
 
        if (!test_bit(PACKET_WRITABLE, &pd->flags)) {
-               printk(DRIVER_NAME": WRITE for ro device %s (%llu)\n",
-                       pd->name, (unsigned long long)bio->bi_sector);
+               pkt_notice(pd, "WRITE for ro device (%llu)\n",
+                          (unsigned long long)bio->bi_sector);
                goto end_io;
        }
 
        if (!bio->bi_size || (bio->bi_size % CD_FRAMESIZE)) {
-               printk(DRIVER_NAME": wrong bio size\n");
+               pkt_err(pd, "wrong bio size\n");
                goto end_io;
        }
 
        blk_queue_bounce(q, &bio);
 
-       zone = ZONE(bio->bi_sector, pd);
-       VPRINTK("pkt_make_request: start = %6llx stop = %6llx\n",
+       zone = get_zone(bio->bi_sector, pd);
+       pkt_dbg(2, pd, "start = %6llx stop = %6llx\n",
                (unsigned long long)bio->bi_sector,
                (unsigned long long)bio_end_sector(bio));
 
@@ -2405,7 +2406,7 @@ static void pkt_make_request(struct request_queue *q, struct bio *bio)
                sector_t last_zone;
                int first_sectors;
 
-               last_zone = ZONE(bio_end_sector(bio) - 1, pd);
+               last_zone = get_zone(bio_end_sector(bio) - 1, pd);
                if (last_zone != zone) {
                        BUG_ON(last_zone != zone + pd->settings.size);
                        first_sectors = last_zone - bio->bi_sector;
@@ -2500,7 +2501,7 @@ static int pkt_merge_bvec(struct request_queue *q, struct bvec_merge_data *bmd,
                          struct bio_vec *bvec)
 {
        struct pktcdvd_device *pd = q->queuedata;
-       sector_t zone = ZONE(bmd->bi_sector, pd);
+       sector_t zone = get_zone(bmd->bi_sector, pd);
        int used = ((bmd->bi_sector - zone) << 9) + bmd->bi_size;
        int remaining = (pd->settings.size << 9) - used;
        int remaining2;
@@ -2609,7 +2610,7 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
        struct block_device *bdev;
 
        if (pd->pkt_dev == dev) {
-               printk(DRIVER_NAME": Recursive setup not allowed\n");
+               pkt_err(pd, "recursive setup not allowed\n");
                return -EBUSY;
        }
        for (i = 0; i < MAX_WRITERS; i++) {
@@ -2617,11 +2618,12 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
                if (!pd2)
                        continue;
                if (pd2->bdev->bd_dev == dev) {
-                       printk(DRIVER_NAME": %s already setup\n", bdevname(pd2->bdev, b));
+                       pkt_err(pd, "%s already setup\n",
+                               bdevname(pd2->bdev, b));
                        return -EBUSY;
                }
                if (pd2->pkt_dev == dev) {
-                       printk(DRIVER_NAME": Can't chain pktcdvd devices\n");
+                       pkt_err(pd, "can't chain pktcdvd devices\n");
                        return -EBUSY;
                }
        }
@@ -2644,13 +2646,13 @@ static int pkt_new_dev(struct pktcdvd_device *pd, dev_t dev)
        atomic_set(&pd->cdrw.pending_bios, 0);
        pd->cdrw.thread = kthread_run(kcdrwd, pd, "%s", pd->name);
        if (IS_ERR(pd->cdrw.thread)) {
-               printk(DRIVER_NAME": can't start kernel thread\n");
+               pkt_err(pd, "can't start kernel thread\n");
                ret = -ENOMEM;
                goto out_mem;
        }
 
        proc_create_data(pd->name, 0, pkt_proc, &pkt_proc_fops, pd);
-       DPRINTK(DRIVER_NAME": writer %s mapped to %s\n", pd->name, bdevname(bdev, b));
+       pkt_dbg(1, pd, "writer mapped to %s\n", bdevname(bdev, b));
        return 0;
 
 out_mem:
@@ -2665,8 +2667,8 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
        struct pktcdvd_device *pd = bdev->bd_disk->private_data;
        int ret;
 
-       VPRINTK("pkt_ioctl: cmd %x, dev %d:%d\n", cmd,
-               MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+       pkt_dbg(2, pd, "cmd %x, dev %d:%d\n",
+               cmd, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
 
        mutex_lock(&pktcdvd_mutex);
        switch (cmd) {
@@ -2690,7 +2692,7 @@ static int pkt_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
                break;
 
        default:
-               VPRINTK(DRIVER_NAME": Unknown ioctl for %s (%x)\n", pd->name, cmd);
+               pkt_dbg(2, pd, "Unknown ioctl (%x)\n", cmd);
                ret = -ENOTTY;
        }
        mutex_unlock(&pktcdvd_mutex);
@@ -2743,7 +2745,7 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
                if (!pkt_devs[idx])
                        break;
        if (idx == MAX_WRITERS) {
-               printk(DRIVER_NAME": max %d writers supported\n", MAX_WRITERS);
+               pr_err("max %d writers supported\n", MAX_WRITERS);
                ret = -EBUSY;
                goto out_mutex;
        }
@@ -2818,7 +2820,7 @@ out_mem:
        kfree(pd);
 out_mutex:
        mutex_unlock(&ctl_mutex);
-       printk(DRIVER_NAME": setup of pktcdvd device failed\n");
+       pr_err("setup of pktcdvd device failed\n");
        return ret;
 }
 
@@ -2839,7 +2841,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
                        break;
        }
        if (idx == MAX_WRITERS) {
-               DPRINTK(DRIVER_NAME": dev not setup\n");
+               pr_debug("dev not setup\n");
                ret = -ENXIO;
                goto out;
        }
@@ -2859,7 +2861,7 @@ static int pkt_remove_dev(dev_t pkt_dev)
        blkdev_put(pd->bdev, FMODE_READ | FMODE_NDELAY);
 
        remove_proc_entry(pd->name, pkt_proc);
-       DPRINTK(DRIVER_NAME": writer %s unmapped\n", pd->name);
+       pkt_dbg(1, pd, "writer unmapped\n");
 
        del_gendisk(pd->disk);
        blk_cleanup_queue(pd->disk->queue);
@@ -2969,7 +2971,7 @@ static int __init pkt_init(void)
 
        ret = register_blkdev(pktdev_major, DRIVER_NAME);
        if (ret < 0) {
-               printk(DRIVER_NAME": Unable to register block device\n");
+               pr_err("unable to register block device\n");
                goto out2;
        }
        if (!pktdev_major)
@@ -2983,7 +2985,7 @@ static int __init pkt_init(void)
 
        ret = misc_register(&pkt_misc);
        if (ret) {
-               printk(DRIVER_NAME": Unable to register misc device\n");
+               pr_err("unable to register misc device\n");
                goto out_misc;
        }
 
index 39c51cc..b22a7d0 100644 (file)
@@ -5132,7 +5132,7 @@ static ssize_t rbd_remove(struct bus_type *bus,
        bool already = false;
        int ret;
 
-       ret = strict_strtoul(buf, 10, &ul);
+       ret = kstrtoul(buf, 10, &ul);
        if (ret)
                return ret;
 
index 8ed6ccb..b02d53a 100644 (file)
@@ -924,7 +924,6 @@ static int swim_probe(struct platform_device *dev)
        return 0;
 
 out_kfree:
-       platform_set_drvdata(dev, NULL);
        kfree(swd);
 out_iounmap:
        iounmap(swim_base);
@@ -962,7 +961,6 @@ static int swim_remove(struct platform_device *dev)
        if (res)
                release_mem_region(res->start, resource_size(res));
 
-       platform_set_drvdata(dev, NULL);
        kfree(swd);
 
        return 0;
index fe5c3cd..c2014a0 100644 (file)
@@ -620,7 +620,7 @@ static void backend_changed(struct xenbus_watch *watch,
        }
 
        /* Front end dir is a number, which is used as the handle. */
-       err = strict_strtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
+       err = kstrtoul(strrchr(dev->otherend, '/') + 1, 0, &handle);
        if (err)
                return;
 
index 0d91fe5..7737b5b 100644 (file)
 #include <linux/fips.h>
 #include <linux/ptrace.h>
 #include <linux/kmemcheck.h>
-
-#ifdef CONFIG_GENERIC_HARDIRQS
-# include <linux/irq.h>
-#endif
+#include <linux/irq.h>
 
 #include <asm/processor.h>
 #include <asm/uaccess.h>
index 4519cb3..5796d01 100644 (file)
@@ -766,6 +766,25 @@ static void tpm_tis_reenable_interrupts(struct tpm_chip *chip)
 }
 #endif
 
+#ifdef CONFIG_PM_SLEEP
+static int tpm_tis_resume(struct device *dev)
+{
+       struct tpm_chip *chip = dev_get_drvdata(dev);
+       int ret;
+
+       if (chip->vendor.irq)
+               tpm_tis_reenable_interrupts(chip);
+
+       ret = tpm_pm_resume(dev);
+       if (!ret)
+               tpm_do_selftest(chip);
+
+       return ret;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume);
+
 #ifdef CONFIG_PNP
 static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
                                      const struct pnp_device_id *pnp_id)
@@ -787,26 +806,6 @@ static int tpm_tis_pnp_init(struct pnp_dev *pnp_dev,
        return tpm_tis_init(&pnp_dev->dev, start, len, irq);
 }
 
-static int tpm_tis_pnp_suspend(struct pnp_dev *dev, pm_message_t msg)
-{
-       return tpm_pm_suspend(&dev->dev);
-}
-
-static int tpm_tis_pnp_resume(struct pnp_dev *dev)
-{
-       struct tpm_chip *chip = pnp_get_drvdata(dev);
-       int ret;
-
-       if (chip->vendor.irq)
-               tpm_tis_reenable_interrupts(chip);
-
-       ret = tpm_pm_resume(&dev->dev);
-       if (!ret)
-               tpm_do_selftest(chip);
-
-       return ret;
-}
-
 static struct pnp_device_id tpm_pnp_tbl[] = {
        {"PNP0C31", 0},         /* TPM */
        {"ATM1200", 0},         /* Atmel */
@@ -835,9 +834,12 @@ static struct pnp_driver tis_pnp_driver = {
        .name = "tpm_tis",
        .id_table = tpm_pnp_tbl,
        .probe = tpm_tis_pnp_init,
-       .suspend = tpm_tis_pnp_suspend,
-       .resume = tpm_tis_pnp_resume,
        .remove = tpm_tis_pnp_remove,
+#ifdef CONFIG_PM_SLEEP
+       .driver = {
+               .pm = &tpm_tis_pm,
+       },
+#endif
 };
 
 #define TIS_HID_USR_IDX sizeof(tpm_pnp_tbl)/sizeof(struct pnp_device_id) -2
@@ -846,20 +848,6 @@ module_param_string(hid, tpm_pnp_tbl[TIS_HID_USR_IDX].id,
 MODULE_PARM_DESC(hid, "Set additional specific HID for this driver to probe");
 #endif
 
-#ifdef CONFIG_PM_SLEEP
-static int tpm_tis_resume(struct device *dev)
-{
-       struct tpm_chip *chip = dev_get_drvdata(dev);
-
-       if (chip->vendor.irq)
-               tpm_tis_reenable_interrupts(chip);
-
-       return tpm_pm_resume(dev);
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(tpm_tis_pm, tpm_pm_suspend, tpm_tis_resume);
-
 static struct platform_driver tis_drv = {
        .driver = {
                .name = "tpm_tis",
index 4329a29..b9c81b7 100644 (file)
@@ -315,68 +315,47 @@ static int em_sti_probe(struct platform_device *pdev)
 {
        struct em_sti_priv *p;
        struct resource *res;
-       int irq, ret;
+       int irq;
 
-       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       p = devm_kzalloc(&pdev->dev, sizeof(*p), GFP_KERNEL);
        if (p == NULL) {
                dev_err(&pdev->dev, "failed to allocate driver data\n");
-               ret = -ENOMEM;
-               goto err0;
+               return -ENOMEM;
        }
 
        p->pdev = pdev;
        platform_set_drvdata(pdev, p);
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "failed to get I/O memory\n");
-               ret = -EINVAL;
-               goto err0;
-       }
-
        irq = platform_get_irq(pdev, 0);
        if (irq < 0) {
                dev_err(&pdev->dev, "failed to get irq\n");
-               ret = -EINVAL;
-               goto err0;
+               return -EINVAL;
        }
 
        /* map memory, let base point to the STI instance */
-       p->base = ioremap_nocache(res->start, resource_size(res));
-       if (p->base == NULL) {
-               dev_err(&pdev->dev, "failed to remap I/O memory\n");
-               ret = -ENXIO;
-               goto err0;
-       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       p->base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(p->base))
+               return PTR_ERR(p->base);
 
        /* get hold of clock */
-       p->clk = clk_get(&pdev->dev, "sclk");
+       p->clk = devm_clk_get(&pdev->dev, "sclk");
        if (IS_ERR(p->clk)) {
                dev_err(&pdev->dev, "cannot get clock\n");
-               ret = PTR_ERR(p->clk);
-               goto err1;
+               return PTR_ERR(p->clk);
        }
 
-       if (request_irq(irq, em_sti_interrupt,
-                       IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING,
-                       dev_name(&pdev->dev), p)) {
+       if (devm_request_irq(&pdev->dev, irq, em_sti_interrupt,
+                            IRQF_TIMER | IRQF_IRQPOLL | IRQF_NOBALANCING,
+                            dev_name(&pdev->dev), p)) {
                dev_err(&pdev->dev, "failed to request low IRQ\n");
-               ret = -ENOENT;
-               goto err2;
+               return -ENOENT;
        }
 
        raw_spin_lock_init(&p->lock);
        em_sti_register_clockevent(p);
        em_sti_register_clocksource(p);
        return 0;
-
-err2:
-       clk_put(p->clk);
-err1:
-       iounmap(p->base);
-err0:
-       kfree(p);
-       return ret;
 }
 
 static int em_sti_remove(struct platform_device *pdev)
index 7d2c2c5..1b74bea 100644 (file)
@@ -165,7 +165,8 @@ static void nmdk_clkevt_resume(struct clock_event_device *cedev)
 
 static struct clock_event_device nmdk_clkevt = {
        .name           = "mtu_1",
-       .features       = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC,
+       .features       = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC |
+                         CLOCK_EVT_FEAT_DYNIRQ,
        .rating         = 200,
        .set_mode       = nmdk_clkevt_mode,
        .set_next_event = nmdk_clkevt_next,
index 08d0c41..0965e98 100644 (file)
@@ -37,6 +37,7 @@
 
 struct sh_cmt_priv {
        void __iomem *mapbase;
+       void __iomem *mapbase_str;
        struct clk *clk;
        unsigned long width; /* 16 or 32 bit version of hardware block */
        unsigned long overflow_bit;
@@ -79,6 +80,12 @@ struct sh_cmt_priv {
  * CMCSR 0xffca0060 16-bit
  * CMCNT 0xffca0064 32-bit
  * CMCOR 0xffca0068 32-bit
+ *
+ * "32-bit counter and 32-bit control" as found on r8a73a4 and r8a7790:
+ * CMSTR 0xffca0500 32-bit
+ * CMCSR 0xffca0510 32-bit
+ * CMCNT 0xffca0514 32-bit
+ * CMCOR 0xffca0518 32-bit
  */
 
 static unsigned long sh_cmt_read16(void __iomem *base, unsigned long offs)
@@ -109,9 +116,7 @@ static void sh_cmt_write32(void __iomem *base, unsigned long offs,
 
 static inline unsigned long sh_cmt_read_cmstr(struct sh_cmt_priv *p)
 {
-       struct sh_timer_config *cfg = p->pdev->dev.platform_data;
-
-       return p->read_control(p->mapbase - cfg->channel_offset, 0);
+       return p->read_control(p->mapbase_str, 0);
 }
 
 static inline unsigned long sh_cmt_read_cmcsr(struct sh_cmt_priv *p)
@@ -127,9 +132,7 @@ static inline unsigned long sh_cmt_read_cmcnt(struct sh_cmt_priv *p)
 static inline void sh_cmt_write_cmstr(struct sh_cmt_priv *p,
                                      unsigned long value)
 {
-       struct sh_timer_config *cfg = p->pdev->dev.platform_data;
-
-       p->write_control(p->mapbase - cfg->channel_offset, 0, value);
+       p->write_control(p->mapbase_str, 0, value);
 }
 
 static inline void sh_cmt_write_cmcsr(struct sh_cmt_priv *p,
@@ -676,7 +679,7 @@ static int sh_cmt_register(struct sh_cmt_priv *p, char *name,
 static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
 {
        struct sh_timer_config *cfg = pdev->dev.platform_data;
-       struct resource *res;
+       struct resource *res, *res2;
        int irq, ret;
        ret = -ENXIO;
 
@@ -694,6 +697,9 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
                goto err0;
        }
 
+       /* optional resource for the shared timer start/stop register */
+       res2 = platform_get_resource(p->pdev, IORESOURCE_MEM, 1);
+
        irq = platform_get_irq(p->pdev, 0);
        if (irq < 0) {
                dev_err(&p->pdev->dev, "failed to get irq\n");
@@ -707,6 +713,15 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
                goto err0;
        }
 
+       /* map second resource for CMSTR */
+       p->mapbase_str = ioremap_nocache(res2 ? res2->start :
+                                        res->start - cfg->channel_offset,
+                                        res2 ? resource_size(res2) : 2);
+       if (p->mapbase_str == NULL) {
+               dev_err(&p->pdev->dev, "failed to remap I/O second memory\n");
+               goto err1;
+       }
+
        /* request irq using setup_irq() (too early for request_irq()) */
        p->irqaction.name = dev_name(&p->pdev->dev);
        p->irqaction.handler = sh_cmt_interrupt;
@@ -719,11 +734,17 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
        if (IS_ERR(p->clk)) {
                dev_err(&p->pdev->dev, "cannot get clock\n");
                ret = PTR_ERR(p->clk);
-               goto err1;
+               goto err2;
        }
 
-       p->read_control = sh_cmt_read16;
-       p->write_control = sh_cmt_write16;
+       if (res2 && (resource_size(res2) == 4)) {
+               /* assume both CMSTR and CMCSR to be 32-bit */
+               p->read_control = sh_cmt_read32;
+               p->write_control = sh_cmt_write32;
+       } else {
+               p->read_control = sh_cmt_read16;
+               p->write_control = sh_cmt_write16;
+       }
 
        if (resource_size(res) == 6) {
                p->width = 16;
@@ -752,22 +773,23 @@ static int sh_cmt_setup(struct sh_cmt_priv *p, struct platform_device *pdev)
                              cfg->clocksource_rating);
        if (ret) {
                dev_err(&p->pdev->dev, "registration failed\n");
-               goto err2;
+               goto err3;
        }
        p->cs_enabled = false;
 
        ret = setup_irq(irq, &p->irqaction);
        if (ret) {
                dev_err(&p->pdev->dev, "failed to request irq %d\n", irq);
-               goto err2;
+               goto err3;
        }
 
        platform_set_drvdata(pdev, p);
 
        return 0;
-err2:
+err3:
        clk_put(p->clk);
-
+err2:
+       iounmap(p->mapbase_str);
 err1:
        iounmap(p->mapbase);
 err0:
index 847cab6..0198504 100644 (file)
  *
  * Timer 0 is used as free-running clocksource, while timer 1 is
  * used as clock_event_device.
+ *
+ * ---
+ * Clocksource driver for Armada 370 and Armada XP SoC.
+ * This driver implements one compatible string for each SoC, given
+ * each has its own characteristics:
+ *
+ *   * Armada 370 has no 25 MHz fixed timer.
+ *
+ *   * Armada XP cannot work properly without such 25 MHz fixed timer as
+ *     doing otherwise leads to using a clocksource whose frequency varies
+ *     when doing cpufreq frequency changes.
+ *
+ * See Documentation/devicetree/bindings/timer/marvell,armada-370-xp-timer.txt
  */
 
 #include <linux/init.h>
 #include <linux/module.h>
 #include <linux/sched_clock.h>
 #include <linux/percpu.h>
-#include <linux/time-armada-370-xp.h>
 
 /*
  * Timer block registers.
  */
 #define TIMER_CTRL_OFF         0x0000
-#define  TIMER0_EN              0x0001
-#define  TIMER0_RELOAD_EN       0x0002
-#define  TIMER0_25MHZ            0x0800
+#define  TIMER0_EN              BIT(0)
+#define  TIMER0_RELOAD_EN       BIT(1)
+#define  TIMER0_25MHZ            BIT(11)
 #define  TIMER0_DIV(div)         ((div) << 19)
-#define  TIMER1_EN              0x0004
-#define  TIMER1_RELOAD_EN       0x0008
-#define  TIMER1_25MHZ            0x1000
+#define  TIMER1_EN              BIT(2)
+#define  TIMER1_RELOAD_EN       BIT(3)
+#define  TIMER1_25MHZ            BIT(12)
 #define  TIMER1_DIV(div)         ((div) << 22)
 #define TIMER_EVENTS_STATUS    0x0004
 #define  TIMER0_CLR_MASK         (~0x1)
@@ -72,6 +84,18 @@ static u32 ticks_per_jiffy;
 
 static struct clock_event_device __percpu *armada_370_xp_evt;
 
+static void timer_ctrl_clrset(u32 clr, u32 set)
+{
+       writel((readl(timer_base + TIMER_CTRL_OFF) & ~clr) | set,
+               timer_base + TIMER_CTRL_OFF);
+}
+
+static void local_timer_ctrl_clrset(u32 clr, u32 set)
+{
+       writel((readl(local_base + TIMER_CTRL_OFF) & ~clr) | set,
+               local_base + TIMER_CTRL_OFF);
+}
+
 static u32 notrace armada_370_xp_read_sched_clock(void)
 {
        return ~readl(timer_base + TIMER0_VAL_OFF);
@@ -84,7 +108,6 @@ static int
 armada_370_xp_clkevt_next_event(unsigned long delta,
                                struct clock_event_device *dev)
 {
-       u32 u;
        /*
         * Clear clockevent timer interrupt.
         */
@@ -98,11 +121,8 @@ armada_370_xp_clkevt_next_event(unsigned long delta,
        /*
         * Enable the timer.
         */
-       u = readl(local_base + TIMER_CTRL_OFF);
-       u = ((u & ~TIMER0_RELOAD_EN) | TIMER0_EN |
-            TIMER0_DIV(TIMER_DIVIDER_SHIFT));
-       writel(u, local_base + TIMER_CTRL_OFF);
-
+       local_timer_ctrl_clrset(TIMER0_RELOAD_EN,
+                               TIMER0_EN | TIMER0_DIV(TIMER_DIVIDER_SHIFT));
        return 0;
 }
 
@@ -110,8 +130,6 @@ static void
 armada_370_xp_clkevt_mode(enum clock_event_mode mode,
                          struct clock_event_device *dev)
 {
-       u32 u;
-
        if (mode == CLOCK_EVT_MODE_PERIODIC) {
 
                /*
@@ -123,18 +141,14 @@ armada_370_xp_clkevt_mode(enum clock_event_mode mode,
                /*
                 * Enable timer.
                 */
-
-               u = readl(local_base + TIMER_CTRL_OFF);
-
-               writel((u | TIMER0_EN | TIMER0_RELOAD_EN |
-                       TIMER0_DIV(TIMER_DIVIDER_SHIFT)),
-                       local_base + TIMER_CTRL_OFF);
+               local_timer_ctrl_clrset(0, TIMER0_RELOAD_EN |
+                                          TIMER0_EN |
+                                          TIMER0_DIV(TIMER_DIVIDER_SHIFT));
        } else {
                /*
                 * Disable timer.
                 */
-               u = readl(local_base + TIMER_CTRL_OFF);
-               writel(u & ~TIMER0_EN, local_base + TIMER_CTRL_OFF);
+               local_timer_ctrl_clrset(TIMER0_EN, 0);
 
                /*
                 * ACK pending timer interrupt.
@@ -163,14 +177,14 @@ static irqreturn_t armada_370_xp_timer_interrupt(int irq, void *dev_id)
  */
 static int armada_370_xp_timer_setup(struct clock_event_device *evt)
 {
-       u32 u;
+       u32 clr = 0, set = 0;
        int cpu = smp_processor_id();
 
-       u = readl(local_base + TIMER_CTRL_OFF);
        if (timer25Mhz)
-               writel(u | TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+               set = TIMER0_25MHZ;
        else
-               writel(u & ~TIMER0_25MHZ, local_base + TIMER_CTRL_OFF);
+               clr = TIMER0_25MHZ;
+       local_timer_ctrl_clrset(clr, set);
 
        evt->name               = "armada_370_xp_per_cpu_tick",
        evt->features           = CLOCK_EVT_FEAT_ONESHOT |
@@ -217,36 +231,21 @@ static struct notifier_block armada_370_xp_timer_cpu_nb = {
        .notifier_call = armada_370_xp_timer_cpu_notify,
 };
 
-void __init armada_370_xp_timer_init(void)
+static void __init armada_370_xp_timer_common_init(struct device_node *np)
 {
-       u32 u;
-       struct device_node *np;
+       u32 clr = 0, set = 0;
        int res;
 
-       np = of_find_compatible_node(NULL, NULL, "marvell,armada-370-xp-timer");
        timer_base = of_iomap(np, 0);
        WARN_ON(!timer_base);
        local_base = of_iomap(np, 1);
 
-       if (of_find_property(np, "marvell,timer-25Mhz", NULL)) {
-               /* The fixed 25MHz timer is available so let's use it */
-               u = readl(timer_base + TIMER_CTRL_OFF);
-               writel(u | TIMER0_25MHZ,
-                      timer_base + TIMER_CTRL_OFF);
-               timer_clk = 25000000;
-       } else {
-               unsigned long rate = 0;
-               struct clk *clk = of_clk_get(np, 0);
-               WARN_ON(IS_ERR(clk));
-               rate =  clk_get_rate(clk);
-
-               u = readl(timer_base + TIMER_CTRL_OFF);
-               writel(u & ~(TIMER0_25MHZ),
-                      timer_base + TIMER_CTRL_OFF);
-
-               timer_clk = rate / TIMER_DIVIDER;
-               timer25Mhz = false;
-       }
+       if (timer25Mhz)
+               set = TIMER0_25MHZ;             
+       else
+               clr = TIMER0_25MHZ;
+       timer_ctrl_clrset(clr, set);
+       local_timer_ctrl_clrset(clr, set);
 
        /*
         * We use timer 0 as clocksource, and private(local) timer 0
@@ -268,10 +267,8 @@ void __init armada_370_xp_timer_init(void)
        writel(0xffffffff, timer_base + TIMER0_VAL_OFF);
        writel(0xffffffff, timer_base + TIMER0_RELOAD_OFF);
 
-       u = readl(timer_base + TIMER_CTRL_OFF);
-
-       writel((u | TIMER0_EN | TIMER0_RELOAD_EN |
-               TIMER0_DIV(TIMER_DIVIDER_SHIFT)), timer_base + TIMER_CTRL_OFF);
+       timer_ctrl_clrset(0, TIMER0_EN | TIMER0_RELOAD_EN |
+                            TIMER0_DIV(TIMER_DIVIDER_SHIFT));
 
        clocksource_mmio_init(timer_base + TIMER0_VAL_OFF,
                              "armada_370_xp_clocksource",
@@ -293,3 +290,29 @@ void __init armada_370_xp_timer_init(void)
        if (!res)
                armada_370_xp_timer_setup(this_cpu_ptr(armada_370_xp_evt));
 }
+
+static void __init armada_xp_timer_init(struct device_node *np)
+{
+       struct clk *clk = of_clk_get_by_name(np, "fixed");
+
+       /* The 25Mhz fixed clock is mandatory, and must always be available */
+       BUG_ON(IS_ERR(clk));
+       timer_clk = clk_get_rate(clk);
+
+       armada_370_xp_timer_common_init(np);
+}
+CLOCKSOURCE_OF_DECLARE(armada_xp, "marvell,armada-xp-timer",
+                      armada_xp_timer_init);
+
+static void __init armada_370_timer_init(struct device_node *np)
+{
+       struct clk *clk = of_clk_get(np, 0);
+
+       BUG_ON(IS_ERR(clk));
+       timer_clk = clk_get_rate(clk) / TIMER_DIVIDER;
+       timer25Mhz = false;
+
+       armada_370_xp_timer_common_init(np);
+}
+CLOCKSOURCE_OF_DECLARE(armada_370, "marvell,armada-370-timer",
+                      armada_370_timer_init);
index 5c75e31..43c24aa 100644 (file)
@@ -280,13 +280,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
        switch (state) {
 
        case CPUFREQ_PRECHANGE:
-               if (WARN(policy->transition_ongoing ==
-                                       cpumask_weight(policy->cpus),
-                               "In middle of another frequency transition\n"))
-                       return;
-
-               policy->transition_ongoing++;
-
                /* detect if the driver reported a value as "old frequency"
                 * which is not equal to what the cpufreq core thinks is
                 * "old frequency".
@@ -306,12 +299,6 @@ static void __cpufreq_notify_transition(struct cpufreq_policy *policy,
                break;
 
        case CPUFREQ_POSTCHANGE:
-               if (WARN(!policy->transition_ongoing,
-                               "No frequency transition in progress\n"))
-                       return;
-
-               policy->transition_ongoing--;
-
                adjust_jiffies(CPUFREQ_POSTCHANGE, freqs);
                pr_debug("FREQ: %lu - CPU: %lu", (unsigned long)freqs->new,
                        (unsigned long)freqs->cpu);
@@ -437,7 +424,7 @@ static int __cpufreq_set_policy(struct cpufreq_policy *policy,
 static ssize_t store_##file_name                                       \
 (struct cpufreq_policy *policy, const char *buf, size_t count)         \
 {                                                                      \
-       unsigned int ret;                                               \
+       int ret;                                                        \
        struct cpufreq_policy new_policy;                               \
                                                                        \
        ret = cpufreq_get_policy(&new_policy, policy->cpu);             \
@@ -490,7 +477,7 @@ static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf)
 static ssize_t store_scaling_governor(struct cpufreq_policy *policy,
                                        const char *buf, size_t count)
 {
-       unsigned int ret;
+       int ret;
        char    str_governor[16];
        struct cpufreq_policy new_policy;
 
@@ -694,8 +681,13 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
        struct freq_attr *fattr = to_attr(attr);
        ssize_t ret = -EINVAL;
 
+       get_online_cpus();
+
+       if (!cpu_online(policy->cpu))
+               goto unlock;
+
        if (!down_read_trylock(&cpufreq_rwsem))
-               goto exit;
+               goto unlock;
 
        if (lock_policy_rwsem_write(policy->cpu) < 0)
                goto up_read;
@@ -709,7 +701,9 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
 
 up_read:
        up_read(&cpufreq_rwsem);
-exit:
+unlock:
+       put_online_cpus();
+
        return ret;
 }
 
@@ -912,11 +906,11 @@ static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu)
        struct cpufreq_policy *policy;
        unsigned long flags;
 
-       write_lock_irqsave(&cpufreq_driver_lock, flags);
+       read_lock_irqsave(&cpufreq_driver_lock, flags);
 
        policy = per_cpu(cpufreq_cpu_data_fallback, cpu);
 
-       write_unlock_irqrestore(&cpufreq_driver_lock, flags);
+       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
 
        return policy;
 }
@@ -953,6 +947,21 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy)
        kfree(policy);
 }
 
+static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
+{
+       if (cpu == policy->cpu)
+               return;
+
+       policy->last_cpu = policy->cpu;
+       policy->cpu = cpu;
+
+#ifdef CONFIG_CPU_FREQ_TABLE
+       cpufreq_frequency_table_update_policy_cpu(policy);
+#endif
+       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
+                       CPUFREQ_UPDATE_POLICY_CPU, policy);
+}
+
 static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
                             bool frozen)
 {
@@ -1006,7 +1015,18 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
        if (!policy)
                goto nomem_out;
 
-       policy->cpu = cpu;
+
+       /*
+        * In the resume path, since we restore a saved policy, the assignment
+        * to policy->cpu is like an update of the existing policy, rather than
+        * the creation of a brand new one. So we need to perform this update
+        * by invoking update_policy_cpu().
+        */
+       if (frozen && cpu != policy->cpu)
+               update_policy_cpu(policy, cpu);
+       else
+               policy->cpu = cpu;
+
        policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
        cpumask_copy(policy->cpus, cpumask_of(cpu));
 
@@ -1098,18 +1118,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
        return __cpufreq_add_dev(dev, sif, false);
 }
 
-static void update_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu)
-{
-       policy->last_cpu = policy->cpu;
-       policy->cpu = cpu;
-
-#ifdef CONFIG_CPU_FREQ_TABLE
-       cpufreq_frequency_table_update_policy_cpu(policy);
-#endif
-       blocking_notifier_call_chain(&cpufreq_policy_notifier_list,
-                       CPUFREQ_UPDATE_POLICY_CPU, policy);
-}
-
 static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
                                           unsigned int old_cpu, bool frozen)
 {
@@ -1141,22 +1149,14 @@ static int cpufreq_nominate_new_policy_cpu(struct cpufreq_policy *policy,
        return cpu_dev->id;
 }
 
-/**
- * __cpufreq_remove_dev - remove a CPU device
- *
- * Removes the cpufreq interface for a CPU device.
- * Caller should already have policy_rwsem in write mode for this CPU.
- * This routine frees the rwsem before returning.
- */
-static int __cpufreq_remove_dev(struct device *dev,
-                               struct subsys_interface *sif, bool frozen)
+static int __cpufreq_remove_dev_prepare(struct device *dev,
+                                       struct subsys_interface *sif,
+                                       bool frozen)
 {
        unsigned int cpu = dev->id, cpus;
        int new_cpu, ret;
        unsigned long flags;
        struct cpufreq_policy *policy;
-       struct kobject *kobj;
-       struct completion *cmp;
 
        pr_debug("%s: unregistering CPU %u\n", __func__, cpu);
 
@@ -1196,8 +1196,9 @@ static int __cpufreq_remove_dev(struct device *dev,
                cpumask_clear_cpu(cpu, policy->cpus);
        unlock_policy_rwsem_write(cpu);
 
-       if (cpu != policy->cpu && !frozen) {
-               sysfs_remove_link(&dev->kobj, "cpufreq");
+       if (cpu != policy->cpu) {
+               if (!frozen)
+                       sysfs_remove_link(&dev->kobj, "cpufreq");
        } else if (cpus > 1) {
 
                new_cpu = cpufreq_nominate_new_policy_cpu(policy, cpu, frozen);
@@ -1213,6 +1214,33 @@ static int __cpufreq_remove_dev(struct device *dev,
                }
        }
 
+       return 0;
+}
+
+static int __cpufreq_remove_dev_finish(struct device *dev,
+                                      struct subsys_interface *sif,
+                                      bool frozen)
+{
+       unsigned int cpu = dev->id, cpus;
+       int ret;
+       unsigned long flags;
+       struct cpufreq_policy *policy;
+       struct kobject *kobj;
+       struct completion *cmp;
+
+       read_lock_irqsave(&cpufreq_driver_lock, flags);
+       policy = per_cpu(cpufreq_cpu_data, cpu);
+       read_unlock_irqrestore(&cpufreq_driver_lock, flags);
+
+       if (!policy) {
+               pr_debug("%s: No cpu_data found\n", __func__);
+               return -EINVAL;
+       }
+
+       lock_policy_rwsem_read(cpu);
+       cpus = cpumask_weight(policy->cpus);
+       unlock_policy_rwsem_read(cpu);
+
        /* If cpu is last user of policy, free policy */
        if (cpus == 1) {
                if (cpufreq_driver->target) {
@@ -1272,6 +1300,27 @@ static int __cpufreq_remove_dev(struct device *dev,
        return 0;
 }
 
+/**
+ * __cpufreq_remove_dev - remove a CPU device
+ *
+ * Removes the cpufreq interface for a CPU device.
+ * Caller should already have policy_rwsem in write mode for this CPU.
+ * This routine frees the rwsem before returning.
+ */
+static inline int __cpufreq_remove_dev(struct device *dev,
+                                      struct subsys_interface *sif,
+                                      bool frozen)
+{
+       int ret;
+
+       ret = __cpufreq_remove_dev_prepare(dev, sif, frozen);
+
+       if (!ret)
+               ret = __cpufreq_remove_dev_finish(dev, sif, frozen);
+
+       return ret;
+}
+
 static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
 {
        unsigned int cpu = dev->id;
@@ -1610,8 +1659,6 @@ int __cpufreq_driver_target(struct cpufreq_policy *policy,
 
        if (cpufreq_disabled())
                return -ENODEV;
-       if (policy->transition_ongoing)
-               return -EBUSY;
 
        /* Make sure that target_freq is within supported range */
        if (target_freq > policy->max)
@@ -1692,8 +1739,9 @@ static int __cpufreq_governor(struct cpufreq_policy *policy,
                                                policy->cpu, event);
 
        mutex_lock(&cpufreq_governor_lock);
-       if ((!policy->governor_enabled && (event == CPUFREQ_GOV_STOP)) ||
-           (policy->governor_enabled && (event == CPUFREQ_GOV_START))) {
+       if ((policy->governor_enabled && event == CPUFREQ_GOV_START)
+           || (!policy->governor_enabled
+           && (event == CPUFREQ_GOV_LIMITS || event == CPUFREQ_GOV_STOP))) {
                mutex_unlock(&cpufreq_governor_lock);
                return -EBUSY;
        }
@@ -1994,7 +2042,11 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb,
                        break;
 
                case CPU_DOWN_PREPARE:
-                       __cpufreq_remove_dev(dev, NULL, frozen);
+                       __cpufreq_remove_dev_prepare(dev, NULL, frozen);
+                       break;
+
+               case CPU_POST_DEAD:
+                       __cpufreq_remove_dev_finish(dev, NULL, frozen);
                        break;
 
                case CPU_DOWN_FAILED:
index 04452f0..4cf0d28 100644 (file)
@@ -74,7 +74,7 @@ static ssize_t show_time_in_state(struct cpufreq_policy *policy, char *buf)
        for (i = 0; i < stat->state_num; i++) {
                len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i],
                        (unsigned long long)
-                       cputime64_to_clock_t(stat->time_in_state[i]));
+                       jiffies_64_to_clock_t(stat->time_in_state[i]));
        }
        return len;
 }
index 6efd96c..9733f29 100644 (file)
@@ -522,6 +522,11 @@ static const struct x86_cpu_id intel_pstate_cpu_ids[] = {
        ICPU(0x2a, default_policy),
        ICPU(0x2d, default_policy),
        ICPU(0x3a, default_policy),
+       ICPU(0x3c, default_policy),
+       ICPU(0x3e, default_policy),
+       ICPU(0x3f, default_policy),
+       ICPU(0x45, default_policy),
+       ICPU(0x46, default_policy),
        {}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_pstate_cpu_ids);
index 3ac499d..6e11701 100644 (file)
@@ -331,7 +331,8 @@ struct cpuidle_driver *cpuidle_driver_ref(void)
        spin_lock(&cpuidle_driver_lock);
 
        drv = cpuidle_get_driver();
-       drv->refcnt++;
+       if (drv)
+               drv->refcnt++;
 
        spin_unlock(&cpuidle_driver_lock);
        return drv;
index dde1324..dcfe964 100644 (file)
@@ -4,7 +4,6 @@
 
 config DW_DMAC_CORE
        tristate "Synopsys DesignWare AHB DMA support"
-       depends on GENERIC_HARDIRQS
        select DMA_ENGINE
 
 config DW_DMAC
index 232fa8f..fa0affb 100644 (file)
@@ -14,7 +14,7 @@
  * of and an antecedent to, SMBIOS, which stands for System
  * Management BIOS.  See further: http://www.dmtf.org/standards
  */
-static char dmi_empty_string[] = "        ";
+static const char dmi_empty_string[] = "        ";
 
 static u16 __initdata dmi_ver;
 /*
@@ -49,7 +49,7 @@ static const char * __init dmi_string_nosave(const struct dmi_header *dm, u8 s)
        return "";
 }
 
-static char * __init dmi_string(const struct dmi_header *dm, u8 s)
+static const char * __init dmi_string(const struct dmi_header *dm, u8 s)
 {
        const char *bp = dmi_string_nosave(dm, s);
        char *str;
@@ -62,8 +62,6 @@ static char * __init dmi_string(const struct dmi_header *dm, u8 s)
        str = dmi_alloc(len);
        if (str != NULL)
                strcpy(str, bp);
-       else
-               printk(KERN_ERR "dmi_string: cannot allocate %Zu bytes.\n", len);
 
        return str;
 }
@@ -133,17 +131,18 @@ static int __init dmi_checksum(const u8 *buf, u8 len)
        return sum == 0;
 }
 
-static char *dmi_ident[DMI_STRING_MAX];
+static const char *dmi_ident[DMI_STRING_MAX];
 static LIST_HEAD(dmi_devices);
 int dmi_available;
 
 /*
  *     Save a DMI string
  */
-static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int string)
+static void __init dmi_save_ident(const struct dmi_header *dm, int slot,
+               int string)
 {
-       const char *d = (const char*) dm;
-       char *p;
+       const char *d = (const char *) dm;
+       const char *p;
 
        if (dmi_ident[slot])
                return;
@@ -155,9 +154,10 @@ static void __init dmi_save_ident(const struct dmi_header *dm, int slot, int str
        dmi_ident[slot] = p;
 }
 
-static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int index)
+static void __init dmi_save_uuid(const struct dmi_header *dm, int slot,
+               int index)
 {
-       const u8 *d = (u8*) dm + index;
+       const u8 *d = (u8 *) dm + index;
        char *s;
        int is_ff = 1, is_00 = 1, i;
 
@@ -188,12 +188,13 @@ static void __init dmi_save_uuid(const struct dmi_header *dm, int slot, int inde
        else
                sprintf(s, "%pUB", d);
 
-        dmi_ident[slot] = s;
+       dmi_ident[slot] = s;
 }
 
-static void __init dmi_save_type(const struct dmi_header *dm, int slot, int index)
+static void __init dmi_save_type(const struct dmi_header *dm, int slot,
+               int index)
 {
-       const u8 *d = (u8*) dm + index;
+       const u8 *d = (u8 *) dm + index;
        char *s;
 
        if (dmi_ident[slot])
@@ -216,10 +217,8 @@ static void __init dmi_save_one_device(int type, const char *name)
                return;
 
        dev = dmi_alloc(sizeof(*dev) + strlen(name) + 1);
-       if (!dev) {
-               printk(KERN_ERR "dmi_save_one_device: out of memory.\n");
+       if (!dev)
                return;
-       }
 
        dev->type = type;
        strcpy((char *)(dev + 1), name);
@@ -249,17 +248,14 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
        struct dmi_device *dev;
 
        for (i = 1; i <= count; i++) {
-               char *devname = dmi_string(dm, i);
+               const char *devname = dmi_string(dm, i);
 
                if (devname == dmi_empty_string)
                        continue;
 
                dev = dmi_alloc(sizeof(*dev));
-               if (!dev) {
-                       printk(KERN_ERR
-                          "dmi_save_oem_strings_devices: out of memory.\n");
+               if (!dev)
                        break;
-               }
 
                dev->type = DMI_DEV_TYPE_OEM_STRING;
                dev->name = devname;
@@ -272,21 +268,17 @@ static void __init dmi_save_oem_strings_devices(const struct dmi_header *dm)
 static void __init dmi_save_ipmi_device(const struct dmi_header *dm)
 {
        struct dmi_device *dev;
-       void * data;
+       void *data;
 
        data = dmi_alloc(dm->length);
-       if (data == NULL) {
-               printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
+       if (data == NULL)
                return;
-       }
 
        memcpy(data, dm, dm->length);
 
        dev = dmi_alloc(sizeof(*dev));
-       if (!dev) {
-               printk(KERN_ERR "dmi_save_ipmi_device: out of memory.\n");
+       if (!dev)
                return;
-       }
 
        dev->type = DMI_DEV_TYPE_IPMI;
        dev->name = "IPMI controller";
@@ -301,10 +293,9 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus,
        struct dmi_dev_onboard *onboard_dev;
 
        onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1);
-       if (!onboard_dev) {
-               printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n");
+       if (!onboard_dev)
                return;
-       }
+
        onboard_dev->instance = instance;
        onboard_dev->segment = segment;
        onboard_dev->bus = bus;
@@ -320,7 +311,7 @@ static void __init dmi_save_dev_onboard(int instance, int segment, int bus,
 
 static void __init dmi_save_extended_devices(const struct dmi_header *dm)
 {
-       const u8 *d = (u8*) dm + 5;
+       const u8 *d = (u8 *) dm + 5;
 
        /* Skip disabled device */
        if ((*d & 0x80) == 0)
@@ -338,7 +329,7 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm)
  */
 static void __init dmi_decode(const struct dmi_header *dm, void *dummy)
 {
-       switch(dm->type) {
+       switch (dm->type) {
        case 0:         /* BIOS Information */
                dmi_save_ident(dm, DMI_BIOS_VENDOR, 4);
                dmi_save_ident(dm, DMI_BIOS_VERSION, 5);
@@ -502,13 +493,7 @@ void __init dmi_scan_machine(void)
                        dmi_available = 1;
                        goto out;
                }
-       }
-       else {
-               /*
-                * no iounmap() for that ioremap(); it would be a no-op, but
-                * it's so early in setup that sucker gets confused into doing
-                * what it shouldn't if we actually call it.
-                */
+       } else {
                p = dmi_ioremap(0xF0000, 0x10000);
                if (p == NULL)
                        goto error;
@@ -533,7 +518,7 @@ void __init dmi_scan_machine(void)
                dmi_iounmap(p, 0x10000);
        }
  error:
-       printk(KERN_INFO "DMI not present or invalid.\n");
+       pr_info("DMI not present or invalid.\n");
  out:
        dmi_initialized = 1;
 }
@@ -669,7 +654,7 @@ int dmi_name_in_serial(const char *str)
 
 /**
  *     dmi_name_in_vendors - Check if string is in the DMI system or board vendor name
- *     @str:   Case sensitive Name
+ *     @str: Case sensitive Name
  */
 int dmi_name_in_vendors(const char *str)
 {
@@ -696,13 +681,13 @@ EXPORT_SYMBOL(dmi_name_in_vendors);
  *     A new search is initiated by passing %NULL as the @from argument.
  *     If @from is not %NULL, searches continue from next device.
  */
-const struct dmi_device * dmi_find_device(int type, const char *name,
+const struct dmi_device *dmi_find_device(int type, const char *name,
                                    const struct dmi_device *from)
 {
        const struct list_head *head = from ? &from->list : &dmi_devices;
        struct list_head *d;
 
-       for(d = head->next; d != &dmi_devices; d = d->next) {
+       for (d = head->next; d != &dmi_devices; d = d->next) {
                const struct dmi_device *dev =
                        list_entry(d, struct dmi_device, list);
 
index acba0b9..6eb535f 100644 (file)
@@ -525,7 +525,7 @@ static ssize_t gsmi_clear_eventlog_store(struct kobject *kobj,
                u32 data_type;
        } param;
 
-       rc = strict_strtoul(buf, 0, &val);
+       rc = kstrtoul(buf, 0, &val);
        if (rc)
                return rc;
 
index 349b161..b6ed304 100644 (file)
@@ -203,6 +203,14 @@ config GPIO_MXS
        select GPIO_GENERIC
        select GENERIC_IRQ_CHIP
 
+config GPIO_OCTEON
+       tristate "Cavium OCTEON GPIO"
+       depends on GPIOLIB && CAVIUM_OCTEON_SOC
+       default y
+       help
+         Say yes here to support the on-chip GPIO lines on the OCTEON
+         family of SOCs.
+
 config GPIO_PL061
        bool "PrimeCell PL061 GPIO support"
        depends on ARM && ARM_AMBA
@@ -314,7 +322,7 @@ config GPIO_ICH
 
 config GPIO_VX855
        tristate "VIA VX855/VX875 GPIO"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        select MFD_VX855
        help
@@ -388,7 +396,7 @@ config GPIO_MAX732X
 
 config GPIO_MAX732X_IRQ
        bool "Interrupt controller support for MAX732x"
-       depends on GPIO_MAX732X=y && GENERIC_HARDIRQS
+       depends on GPIO_MAX732X=y
        help
          Say yes here to enable the max732x to be used as an interrupt
          controller. It requires the driver to be built in the kernel.
@@ -653,7 +661,7 @@ config GPIO_TIMBERDALE
 
 config GPIO_RDC321X
        tristate "RDC R-321x GPIO support"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        select MFD_RDC321X
        help
index 97438bf..98e23eb 100644 (file)
@@ -52,6 +52,7 @@ obj-$(CONFIG_GPIO_MSM_V2)     += gpio-msm-v2.o
 obj-$(CONFIG_GPIO_MVEBU)        += gpio-mvebu.o
 obj-$(CONFIG_GPIO_MXC)         += gpio-mxc.o
 obj-$(CONFIG_GPIO_MXS)         += gpio-mxs.o
+obj-$(CONFIG_GPIO_OCTEON)      += gpio-octeon.o
 obj-$(CONFIG_ARCH_OMAP)                += gpio-omap.o
 obj-$(CONFIG_GPIO_PCA953X)     += gpio-pca953x.o
 obj-$(CONFIG_GPIO_PCF857X)     += gpio-pcf857x.o
diff --git a/drivers/gpio/gpio-octeon.c b/drivers/gpio/gpio-octeon.c
new file mode 100644 (file)
index 0000000..71a4a31
--- /dev/null
@@ -0,0 +1,157 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2011, 2012 Cavium Inc.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/io.h>
+
+#include <asm/octeon/octeon.h>
+#include <asm/octeon/cvmx-gpio-defs.h>
+
+#define RX_DAT 0x80
+#define TX_SET 0x88
+#define TX_CLEAR 0x90
+/*
+ * The address offset of the GPIO configuration register for a given
+ * line.
+ */
+static unsigned int bit_cfg_reg(unsigned int offset)
+{
+       /*
+        * The register stride is 8, with a discontinuity after the
+        * first 16.
+        */
+       if (offset < 16)
+               return 8 * offset;
+       else
+               return 8 * (offset - 16) + 0x100;
+}
+
+struct octeon_gpio {
+       struct gpio_chip chip;
+       u64 register_base;
+};
+
+static int octeon_gpio_dir_in(struct gpio_chip *chip, unsigned offset)
+{
+       struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip);
+
+       cvmx_write_csr(gpio->register_base + bit_cfg_reg(offset), 0);
+       return 0;
+}
+
+static void octeon_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+{
+       struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip);
+       u64 mask = 1ull << offset;
+       u64 reg = gpio->register_base + (value ? TX_SET : TX_CLEAR);
+       cvmx_write_csr(reg, mask);
+}
+
+static int octeon_gpio_dir_out(struct gpio_chip *chip, unsigned offset,
+                              int value)
+{
+       struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip);
+       union cvmx_gpio_bit_cfgx cfgx;
+
+       octeon_gpio_set(chip, offset, value);
+
+       cfgx.u64 = 0;
+       cfgx.s.tx_oe = 1;
+
+       cvmx_write_csr(gpio->register_base + bit_cfg_reg(offset), cfgx.u64);
+       return 0;
+}
+
+static int octeon_gpio_get(struct gpio_chip *chip, unsigned offset)
+{
+       struct octeon_gpio *gpio = container_of(chip, struct octeon_gpio, chip);
+       u64 read_bits = cvmx_read_csr(gpio->register_base + RX_DAT);
+
+       return ((1ull << offset) & read_bits) != 0;
+}
+
+static int octeon_gpio_probe(struct platform_device *pdev)
+{
+       struct octeon_gpio *gpio;
+       struct gpio_chip *chip;
+       struct resource *res_mem;
+       int err = 0;
+
+       gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
+       if (!gpio)
+               return -ENOMEM;
+       chip = &gpio->chip;
+
+       res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       if (res_mem == NULL) {
+               dev_err(&pdev->dev, "found no memory resource\n");
+               err = -ENXIO;
+               goto out;
+       }
+       if (!devm_request_mem_region(&pdev->dev, res_mem->start,
+                                       resource_size(res_mem),
+                                    res_mem->name)) {
+               dev_err(&pdev->dev, "request_mem_region failed\n");
+               err = -ENXIO;
+               goto out;
+       }
+       gpio->register_base = (u64)devm_ioremap(&pdev->dev, res_mem->start,
+                                               resource_size(res_mem));
+
+       pdev->dev.platform_data = chip;
+       chip->label = "octeon-gpio";
+       chip->dev = &pdev->dev;
+       chip->owner = THIS_MODULE;
+       chip->base = 0;
+       chip->can_sleep = 0;
+       chip->ngpio = 20;
+       chip->direction_input = octeon_gpio_dir_in;
+       chip->get = octeon_gpio_get;
+       chip->direction_output = octeon_gpio_dir_out;
+       chip->set = octeon_gpio_set;
+       err = gpiochip_add(chip);
+       if (err)
+               goto out;
+
+       dev_info(&pdev->dev, "OCTEON GPIO driver probed.\n");
+out:
+       return err;
+}
+
+static int octeon_gpio_remove(struct platform_device *pdev)
+{
+       struct gpio_chip *chip = pdev->dev.platform_data;
+       return gpiochip_remove(chip);
+}
+
+static struct of_device_id octeon_gpio_match[] = {
+       {
+               .compatible = "cavium,octeon-3860-gpio",
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, octeon_gpio_match);
+
+static struct platform_driver octeon_gpio_driver = {
+       .driver = {
+               .name           = "octeon_gpio",
+               .owner          = THIS_MODULE,
+               .of_match_table = octeon_gpio_match,
+       },
+       .probe          = octeon_gpio_probe,
+       .remove         = octeon_gpio_remove,
+};
+
+module_platform_driver(octeon_gpio_driver);
+
+MODULE_DESCRIPTION("Cavium Inc. OCTEON GPIO Driver");
+MODULE_AUTHOR("David Daney");
+MODULE_LICENSE("GPL");
index 9b265a4..c27a210 100644 (file)
@@ -1676,7 +1676,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
        return 0;
 
 out_gem_unload:
-       if (dev_priv->mm.inactive_shrinker.shrink)
+       if (dev_priv->mm.inactive_shrinker.scan_objects)
                unregister_shrinker(&dev_priv->mm.inactive_shrinker);
 
        if (dev->pdev->msi_enabled)
@@ -1715,7 +1715,7 @@ int i915_driver_unload(struct drm_device *dev)
 
        i915_teardown_sysfs(dev);
 
-       if (dev_priv->mm.inactive_shrinker.shrink)
+       if (dev_priv->mm.inactive_shrinker.scan_objects)
                unregister_shrinker(&dev_priv->mm.inactive_shrinker);
 
        mutex_lock(&dev->struct_mutex);
index d9e337f..8507c6d 100644 (file)
@@ -57,10 +57,12 @@ static void i915_gem_object_update_fence(struct drm_i915_gem_object *obj,
                                         struct drm_i915_fence_reg *fence,
                                         bool enable);
 
-static int i915_gem_inactive_shrink(struct shrinker *shrinker,
-                                   struct shrink_control *sc);
+static unsigned long i915_gem_inactive_count(struct shrinker *shrinker,
+                                            struct shrink_control *sc);
+static unsigned long i915_gem_inactive_scan(struct shrinker *shrinker,
+                                           struct shrink_control *sc);
 static long i915_gem_purge(struct drm_i915_private *dev_priv, long target);
-static void i915_gem_shrink_all(struct drm_i915_private *dev_priv);
+static long i915_gem_shrink_all(struct drm_i915_private *dev_priv);
 static void i915_gem_object_truncate(struct drm_i915_gem_object *obj);
 
 static bool cpu_cache_is_coherent(struct drm_device *dev,
@@ -1769,16 +1771,21 @@ i915_gem_purge(struct drm_i915_private *dev_priv, long target)
        return __i915_gem_shrink(dev_priv, target, true);
 }
 
-static void
+static long
 i915_gem_shrink_all(struct drm_i915_private *dev_priv)
 {
        struct drm_i915_gem_object *obj, *next;
+       long freed = 0;
 
        i915_gem_evict_everything(dev_priv->dev);
 
        list_for_each_entry_safe(obj, next, &dev_priv->mm.unbound_list,
-                                global_list)
+                                global_list) {
+               if (obj->pages_pin_count == 0)
+                       freed += obj->base.size >> PAGE_SHIFT;
                i915_gem_object_put_pages(obj);
+       }
+       return freed;
 }
 
 static int
@@ -4558,7 +4565,8 @@ i915_gem_load(struct drm_device *dev)
 
        dev_priv->mm.interruptible = true;
 
-       dev_priv->mm.inactive_shrinker.shrink = i915_gem_inactive_shrink;
+       dev_priv->mm.inactive_shrinker.scan_objects = i915_gem_inactive_scan;
+       dev_priv->mm.inactive_shrinker.count_objects = i915_gem_inactive_count;
        dev_priv->mm.inactive_shrinker.seeks = DEFAULT_SEEKS;
        register_shrinker(&dev_priv->mm.inactive_shrinker);
 }
@@ -4781,8 +4789,8 @@ static bool mutex_is_locked_by(struct mutex *mutex, struct task_struct *task)
 #endif
 }
 
-static int
-i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
+static unsigned long
+i915_gem_inactive_count(struct shrinker *shrinker, struct shrink_control *sc)
 {
        struct drm_i915_private *dev_priv =
                container_of(shrinker,
@@ -4790,45 +4798,35 @@ i915_gem_inactive_shrink(struct shrinker *shrinker, struct shrink_control *sc)
                             mm.inactive_shrinker);
        struct drm_device *dev = dev_priv->dev;
        struct drm_i915_gem_object *obj;
-       int nr_to_scan = sc->nr_to_scan;
        bool unlock = true;
-       int cnt;
+       unsigned long count;
 
        if (!mutex_trylock(&dev->struct_mutex)) {
                if (!mutex_is_locked_by(&dev->struct_mutex, current))
-                       return 0;
+                       return SHRINK_STOP;
 
                if (dev_priv->mm.shrinker_no_lock_stealing)
-                       return 0;
+                       return SHRINK_STOP;
 
                unlock = false;
        }
 
-       if (nr_to_scan) {
-               nr_to_scan -= i915_gem_purge(dev_priv, nr_to_scan);
-               if (nr_to_scan > 0)
-                       nr_to_scan -= __i915_gem_shrink(dev_priv, nr_to_scan,
-                                                       false);
-               if (nr_to_scan > 0)
-                       i915_gem_shrink_all(dev_priv);
-       }
-
-       cnt = 0;
+       count = 0;
        list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_list)
                if (obj->pages_pin_count == 0)
-                       cnt += obj->base.size >> PAGE_SHIFT;
+                       count += obj->base.size >> PAGE_SHIFT;
 
        list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
                if (obj->active)
                        continue;
 
                if (obj->pin_count == 0 && obj->pages_pin_count == 0)
-                       cnt += obj->base.size >> PAGE_SHIFT;
+                       count += obj->base.size >> PAGE_SHIFT;
        }
 
        if (unlock)
                mutex_unlock(&dev->struct_mutex);
-       return cnt;
+       return count;
 }
 
 /* All the new VM stuff */
@@ -4892,6 +4890,40 @@ unsigned long i915_gem_obj_size(struct drm_i915_gem_object *o,
        return 0;
 }
 
+static unsigned long
+i915_gem_inactive_scan(struct shrinker *shrinker, struct shrink_control *sc)
+{
+       struct drm_i915_private *dev_priv =
+               container_of(shrinker,
+                            struct drm_i915_private,
+                            mm.inactive_shrinker);
+       struct drm_device *dev = dev_priv->dev;
+       int nr_to_scan = sc->nr_to_scan;
+       unsigned long freed;
+       bool unlock = true;
+
+       if (!mutex_trylock(&dev->struct_mutex)) {
+               if (!mutex_is_locked_by(&dev->struct_mutex, current))
+                       return 0;
+
+               if (dev_priv->mm.shrinker_no_lock_stealing)
+                       return 0;
+
+               unlock = false;
+       }
+
+       freed = i915_gem_purge(dev_priv, nr_to_scan);
+       if (freed < nr_to_scan)
+               freed += __i915_gem_shrink(dev_priv, nr_to_scan,
+                                                       false);
+       if (freed < nr_to_scan)
+               freed += i915_gem_shrink_all(dev_priv);
+
+       if (unlock)
+               mutex_unlock(&dev->struct_mutex);
+       return freed;
+}
+
 struct i915_vma *i915_gem_obj_to_vma(struct drm_i915_gem_object *obj,
                                     struct i915_address_space *vm)
 {
index bd2a3b4..863bef9 100644 (file)
@@ -377,28 +377,26 @@ out:
        return nr_free;
 }
 
-/* Get good estimation how many pages are free in pools */
-static int ttm_pool_get_num_unused_pages(void)
-{
-       unsigned i;
-       int total = 0;
-       for (i = 0; i < NUM_POOLS; ++i)
-               total += _manager->pools[i].npages;
-
-       return total;
-}
-
 /**
  * Callback for mm to request pool to reduce number of page held.
+ *
+ * XXX: (dchinner) Deadlock warning!
+ *
+ * ttm_page_pool_free() does memory allocation using GFP_KERNEL.  that means
+ * this can deadlock when called a sc->gfp_mask that is not equal to
+ * GFP_KERNEL.
+ *
+ * This code is crying out for a shrinker per pool....
  */
-static int ttm_pool_mm_shrink(struct shrinker *shrink,
-                             struct shrink_control *sc)
+static unsigned long
+ttm_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        static atomic_t start_pool = ATOMIC_INIT(0);
        unsigned i;
        unsigned pool_offset = atomic_add_return(1, &start_pool);
        struct ttm_page_pool *pool;
        int shrink_pages = sc->nr_to_scan;
+       unsigned long freed = 0;
 
        pool_offset = pool_offset % NUM_POOLS;
        /* select start pool in round robin fashion */
@@ -408,14 +406,28 @@ static int ttm_pool_mm_shrink(struct shrinker *shrink,
                        break;
                pool = &_manager->pools[(i + pool_offset)%NUM_POOLS];
                shrink_pages = ttm_page_pool_free(pool, nr_free);
+               freed += nr_free - shrink_pages;
        }
-       /* return estimated number of unused pages in pool */
-       return ttm_pool_get_num_unused_pages();
+       return freed;
+}
+
+
+static unsigned long
+ttm_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       unsigned i;
+       unsigned long count = 0;
+
+       for (i = 0; i < NUM_POOLS; ++i)
+               count += _manager->pools[i].npages;
+
+       return count;
 }
 
 static void ttm_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
-       manager->mm_shrink.shrink = &ttm_pool_mm_shrink;
+       manager->mm_shrink.count_objects = ttm_pool_shrink_count;
+       manager->mm_shrink.scan_objects = ttm_pool_shrink_scan;
        manager->mm_shrink.seeks = 1;
        register_shrinker(&manager->mm_shrink);
 }
index b8b3943..7957bee 100644 (file)
@@ -918,19 +918,6 @@ int ttm_dma_populate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(ttm_dma_populate);
 
-/* Get good estimation how many pages are free in pools */
-static int ttm_dma_pool_get_num_unused_pages(void)
-{
-       struct device_pools *p;
-       unsigned total = 0;
-
-       mutex_lock(&_manager->lock);
-       list_for_each_entry(p, &_manager->pools, pools)
-               total += p->pool->npages_free;
-       mutex_unlock(&_manager->lock);
-       return total;
-}
-
 /* Put all pages in pages list to correct pool to wait for reuse */
 void ttm_dma_unpopulate(struct ttm_dma_tt *ttm_dma, struct device *dev)
 {
@@ -1002,18 +989,29 @@ EXPORT_SYMBOL_GPL(ttm_dma_unpopulate);
 
 /**
  * Callback for mm to request pool to reduce number of page held.
+ *
+ * XXX: (dchinner) Deadlock warning!
+ *
+ * ttm_dma_page_pool_free() does GFP_KERNEL memory allocation, and so attention
+ * needs to be paid to sc->gfp_mask to determine if this can be done or not.
+ * GFP_KERNEL memory allocation in a GFP_ATOMIC reclaim context woul dbe really
+ * bad.
+ *
+ * I'm getting sadder as I hear more pathetical whimpers about needing per-pool
+ * shrinkers
  */
-static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
-                                 struct shrink_control *sc)
+static unsigned long
+ttm_dma_pool_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        static atomic_t start_pool = ATOMIC_INIT(0);
        unsigned idx = 0;
        unsigned pool_offset = atomic_add_return(1, &start_pool);
        unsigned shrink_pages = sc->nr_to_scan;
        struct device_pools *p;
+       unsigned long freed = 0;
 
        if (list_empty(&_manager->pools))
-               return 0;
+               return SHRINK_STOP;
 
        mutex_lock(&_manager->lock);
        pool_offset = pool_offset % _manager->npools;
@@ -1029,18 +1027,33 @@ static int ttm_dma_pool_mm_shrink(struct shrinker *shrink,
                        continue;
                nr_free = shrink_pages;
                shrink_pages = ttm_dma_page_pool_free(p->pool, nr_free);
+               freed += nr_free - shrink_pages;
+
                pr_debug("%s: (%s:%d) Asked to shrink %d, have %d more to go\n",
                         p->pool->dev_name, p->pool->name, current->pid,
                         nr_free, shrink_pages);
        }
        mutex_unlock(&_manager->lock);
-       /* return estimated number of unused pages in pool */
-       return ttm_dma_pool_get_num_unused_pages();
+       return freed;
+}
+
+static unsigned long
+ttm_dma_pool_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       struct device_pools *p;
+       unsigned long count = 0;
+
+       mutex_lock(&_manager->lock);
+       list_for_each_entry(p, &_manager->pools, pools)
+               count += p->pool->npages_free;
+       mutex_unlock(&_manager->lock);
+       return count;
 }
 
 static void ttm_dma_pool_mm_shrink_init(struct ttm_pool_manager *manager)
 {
-       manager->mm_shrink.shrink = &ttm_dma_pool_mm_shrink;
+       manager->mm_shrink.count_objects = ttm_dma_pool_shrink_count;
+       manager->mm_shrink.scan_objects = &ttm_dma_pool_shrink_scan;
        manager->mm_shrink.seeks = 1;
        register_shrinker(&manager->mm_shrink);
 }
index 3d7c9f6..71b70e3 100644 (file)
@@ -773,7 +773,7 @@ config HID_ZYDACRON
 
 config HID_SENSOR_HUB
        tristate "HID Sensors framework support"
-       depends on HID && GENERIC_HARDIRQS
+       depends on HID
        select MFD_CORE
        default n
        ---help---
index ae88a97..b8470b1 100644 (file)
@@ -94,7 +94,6 @@ EXPORT_SYMBOL_GPL(hid_register_report);
 static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values)
 {
        struct hid_field *field;
-       int i;
 
        if (report->maxfield == HID_MAX_FIELDS) {
                hid_err(report->device, "too many fields in report\n");
@@ -113,9 +112,6 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned
        field->value = (s32 *)(field->usage + usages);
        field->report = report;
 
-       for (i = 0; i < usages; i++)
-               field->usage[i].usage_index = i;
-
        return field;
 }
 
@@ -226,9 +222,9 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
 {
        struct hid_report *report;
        struct hid_field *field;
-       int usages;
+       unsigned usages;
        unsigned offset;
-       int i;
+       unsigned i;
 
        report = hid_register_report(parser->device, report_type, parser->global.report_id);
        if (!report) {
@@ -255,7 +251,8 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
        if (!parser->local.usage_index) /* Ignore padding fields */
                return 0;
 
-       usages = max_t(int, parser->local.usage_index, parser->global.report_count);
+       usages = max_t(unsigned, parser->local.usage_index,
+                                parser->global.report_count);
 
        field = hid_register_field(report, usages, parser->global.report_count);
        if (!field)
@@ -266,13 +263,14 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
        field->application = hid_lookup_collection(parser, HID_COLLECTION_APPLICATION);
 
        for (i = 0; i < usages; i++) {
-               int j = i;
+               unsigned j = i;
                /* Duplicate the last usage we parsed if we have excess values */
                if (i >= parser->local.usage_index)
                        j = parser->local.usage_index - 1;
                field->usage[i].hid = parser->local.usage[j];
                field->usage[i].collection_index =
                        parser->local.collection_index[j];
+               field->usage[i].usage_index = i;
        }
 
        field->maxusage = usages;
@@ -801,6 +799,64 @@ int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size)
 }
 EXPORT_SYMBOL_GPL(hid_parse_report);
 
+static const char * const hid_report_names[] = {
+       "HID_INPUT_REPORT",
+       "HID_OUTPUT_REPORT",
+       "HID_FEATURE_REPORT",
+};
+/**
+ * hid_validate_values - validate existing device report's value indexes
+ *
+ * @device: hid device
+ * @type: which report type to examine
+ * @id: which report ID to examine (0 for first)
+ * @field_index: which report field to examine
+ * @report_counts: expected number of values
+ *
+ * Validate the number of values in a given field of a given report, after
+ * parsing.
+ */
+struct hid_report *hid_validate_values(struct hid_device *hid,
+                                      unsigned int type, unsigned int id,
+                                      unsigned int field_index,
+                                      unsigned int report_counts)
+{
+       struct hid_report *report;
+
+       if (type > HID_FEATURE_REPORT) {
+               hid_err(hid, "invalid HID report type %u\n", type);
+               return NULL;
+       }
+
+       if (id >= HID_MAX_IDS) {
+               hid_err(hid, "invalid HID report id %u\n", id);
+               return NULL;
+       }
+
+       /*
+        * Explicitly not using hid_get_report() here since it depends on
+        * ->numbered being checked, which may not always be the case when
+        * drivers go to access report values.
+        */
+       report = hid->report_enum[type].report_id_hash[id];
+       if (!report) {
+               hid_err(hid, "missing %s %u\n", hid_report_names[type], id);
+               return NULL;
+       }
+       if (report->maxfield <= field_index) {
+               hid_err(hid, "not enough fields in %s %u\n",
+                       hid_report_names[type], id);
+               return NULL;
+       }
+       if (report->field[field_index]->report_count < report_counts) {
+               hid_err(hid, "not enough values in %s %u field %u\n",
+                       hid_report_names[type], id, field_index);
+               return NULL;
+       }
+       return report;
+}
+EXPORT_SYMBOL_GPL(hid_validate_values);
+
 /**
  * hid_open_report - open a driver-specific device report
  *
@@ -1296,7 +1352,7 @@ int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
                        goto out;
        }
 
-       if (hid->claimed != HID_CLAIMED_HIDRAW) {
+       if (hid->claimed != HID_CLAIMED_HIDRAW && report->maxfield) {
                for (a = 0; a < report->maxfield; a++)
                        hid_input_field(hid, report->field[a], cdata, interrupt);
                hdrv = hid->driver;
index b420f4a..8741d95 100644 (file)
@@ -485,6 +485,10 @@ static void hidinput_configure_usage(struct hid_input *hidinput, struct hid_fiel
        if (field->flags & HID_MAIN_ITEM_CONSTANT)
                goto ignore;
 
+       /* Ignore if report count is out of bounds. */
+       if (field->report_count < 1)
+               goto ignore;
+
        /* only LED usages are supported in output fields */
        if (field->report_type == HID_OUTPUT_REPORT &&
                        (usage->hid & HID_USAGE_PAGE) != HID_UP_LED) {
@@ -1236,7 +1240,11 @@ static void report_features(struct hid_device *hid)
 
        rep_enum = &hid->report_enum[HID_FEATURE_REPORT];
        list_for_each_entry(rep, &rep_enum->report_list, list)
-               for (i = 0; i < rep->maxfield; i++)
+               for (i = 0; i < rep->maxfield; i++) {
+                       /* Ignore if report count is out of bounds. */
+                       if (rep->field[i]->report_count < 1)
+                               continue;
+
                        for (j = 0; j < rep->field[i]->maxusage; j++) {
                                /* Verify if Battery Strength feature is available */
                                hidinput_setup_battery(hid, HID_FEATURE_REPORT, rep->field[i]);
@@ -1245,6 +1253,7 @@ static void report_features(struct hid_device *hid)
                                        drv->feature_mapping(hid, rep->field[i],
                                                             rep->field[i]->usage + j);
                        }
+               }
 }
 
 static struct hid_input *hidinput_allocate(struct hid_device *hid)
index 07837f5..31cf29a 100644 (file)
@@ -339,7 +339,15 @@ static int tpkbd_probe_tp(struct hid_device *hdev)
        struct tpkbd_data_pointer *data_pointer;
        size_t name_sz = strlen(dev_name(dev)) + 16;
        char *name_mute, *name_micmute;
-       int ret;
+       int i, ret;
+
+       /* Validate required reports. */
+       for (i = 0; i < 4; i++) {
+               if (!hid_validate_values(hdev, HID_FEATURE_REPORT, 4, i, 1))
+                       return -ENODEV;
+       }
+       if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 3, 0, 2))
+               return -ENODEV;
 
        if (sysfs_create_group(&hdev->dev.kobj,
                                &tpkbd_attr_group_pointer)) {
@@ -406,22 +414,27 @@ static int tpkbd_probe(struct hid_device *hdev,
        ret = hid_parse(hdev);
        if (ret) {
                hid_err(hdev, "hid_parse failed\n");
-               goto err_free;
+               goto err;
        }
 
        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hid_hw_start failed\n");
-               goto err_free;
+               goto err;
        }
 
        uhdev = (struct usbhid_device *) hdev->driver_data;
 
-       if (uhdev->ifnum == 1)
-               return tpkbd_probe_tp(hdev);
+       if (uhdev->ifnum == 1) {
+               ret = tpkbd_probe_tp(hdev);
+               if (ret)
+                       goto err_hid;
+       }
 
        return 0;
-err_free:
+err_hid:
+       hid_hw_stop(hdev);
+err:
        return ret;
 }
 
index b3cd150..1a42eaa 100644 (file)
@@ -64,26 +64,13 @@ int lg2ff_init(struct hid_device *hid)
        struct hid_report *report;
        struct hid_input *hidinput = list_entry(hid->inputs.next,
                                                struct hid_input, list);
-       struct list_head *report_list =
-                       &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev = hidinput->input;
        int error;
 
-       if (list_empty(report_list)) {
-               hid_err(hid, "no output report found\n");
+       /* Check that the report looks ok */
+       report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7);
+       if (!report)
                return -ENODEV;
-       }
-
-       report = list_entry(report_list->next, struct hid_report, list);
-
-       if (report->maxfield < 1) {
-               hid_err(hid, "output report is empty\n");
-               return -ENODEV;
-       }
-       if (report->field[0]->report_count < 7) {
-               hid_err(hid, "not enough values in the field\n");
-               return -ENODEV;
-       }
 
        lg2ff = kmalloc(sizeof(struct lg2ff_device), GFP_KERNEL);
        if (!lg2ff)
index e52f181..8c2da18 100644 (file)
@@ -66,10 +66,11 @@ static int hid_lg3ff_play(struct input_dev *dev, void *data,
        int x, y;
 
 /*
- * Maxusage should always be 63 (maximum fields)
- * likely a better way to ensure this data is clean
+ * Available values in the field should always be 63, but we only use up to
+ * 35. Instead, clear the entire area, however big it is.
  */
-       memset(report->field[0]->value, 0, sizeof(__s32)*report->field[0]->maxusage);
+       memset(report->field[0]->value, 0,
+              sizeof(__s32) * report->field[0]->report_count);
 
        switch (effect->type) {
        case FF_CONSTANT:
@@ -129,32 +130,14 @@ static const signed short ff3_joystick_ac[] = {
 int lg3ff_init(struct hid_device *hid)
 {
        struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev = hidinput->input;
-       struct hid_report *report;
-       struct hid_field *field;
        const signed short *ff_bits = ff3_joystick_ac;
        int error;
        int i;
 
-       /* Find the report to use */
-       if (list_empty(report_list)) {
-               hid_err(hid, "No output report found\n");
-               return -1;
-       }
-
        /* Check that the report looks ok */
-       report = list_entry(report_list->next, struct hid_report, list);
-       if (!report) {
-               hid_err(hid, "NULL output report\n");
-               return -1;
-       }
-
-       field = report->field[0];
-       if (!field) {
-               hid_err(hid, "NULL field\n");
-               return -1;
-       }
+       if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 35))
+               return -ENODEV;
 
        /* Assume single fixed device G940 */
        for (i = 0; ff_bits[i] >= 0; i++)
index 0ddae2a..8782fe1 100644 (file)
@@ -484,34 +484,16 @@ static enum led_brightness lg4ff_led_get_brightness(struct led_classdev *led_cde
 int lg4ff_init(struct hid_device *hid)
 {
        struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev = hidinput->input;
-       struct hid_report *report;
-       struct hid_field *field;
        struct lg4ff_device_entry *entry;
        struct lg_drv_data *drv_data;
        struct usb_device_descriptor *udesc;
        int error, i, j;
        __u16 bcdDevice, rev_maj, rev_min;
 
-       /* Find the report to use */
-       if (list_empty(report_list)) {
-               hid_err(hid, "No output report found\n");
-               return -1;
-       }
-
        /* Check that the report looks ok */
-       report = list_entry(report_list->next, struct hid_report, list);
-       if (!report) {
-               hid_err(hid, "NULL output report\n");
+       if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
                return -1;
-       }
-
-       field = report->field[0];
-       if (!field) {
-               hid_err(hid, "NULL field\n");
-               return -1;
-       }
 
        /* Check what wheel has been connected */
        for (i = 0; i < ARRAY_SIZE(lg4ff_devices); i++) {
index d7ea8c8..e1394af 100644 (file)
@@ -128,27 +128,14 @@ static void hid_lgff_set_autocenter(struct input_dev *dev, u16 magnitude)
 int lgff_init(struct hid_device* hid)
 {
        struct hid_input *hidinput = list_entry(hid->inputs.next, struct hid_input, list);
-       struct list_head *report_list = &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev = hidinput->input;
-       struct hid_report *report;
-       struct hid_field *field;
        const signed short *ff_bits = ff_joystick;
        int error;
        int i;
 
-       /* Find the report to use */
-       if (list_empty(report_list)) {
-               hid_err(hid, "No output report found\n");
-               return -1;
-       }
-
        /* Check that the report looks ok */
-       report = list_entry(report_list->next, struct hid_report, list);
-       field = report->field[0];
-       if (!field) {
-               hid_err(hid, "NULL field\n");
-               return -1;
-       }
+       if (!hid_validate_values(hid, HID_OUTPUT_REPORT, 0, 0, 7))
+               return -ENODEV;
 
        for (i = 0; i < ARRAY_SIZE(devices); i++) {
                if (dev->id.vendor == devices[i].idVendor &&
index 7800b14..2e53024 100644 (file)
@@ -461,7 +461,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
        struct hid_report *report;
        struct hid_report_enum *output_report_enum;
        u8 *data = (u8 *)(&dj_report->device_index);
-       int i;
+       unsigned int i;
 
        output_report_enum = &hdev->report_enum[HID_OUTPUT_REPORT];
        report = output_report_enum->report_id_hash[REPORT_ID_DJ_SHORT];
@@ -471,7 +471,7 @@ static int logi_dj_recv_send_report(struct dj_receiver_dev *djrcv_dev,
                return -ENODEV;
        }
 
-       for (i = 0; i < report->field[0]->report_count; i++)
+       for (i = 0; i < DJREPORT_SHORT_LENGTH - 1; i++)
                report->field[0]->value[i] = data[i];
 
        hid_hw_request(hdev, report, HID_REQ_SET_REPORT);
@@ -791,6 +791,12 @@ static int logi_dj_probe(struct hid_device *hdev,
                goto hid_parse_fail;
        }
 
+       if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, REPORT_ID_DJ_SHORT,
+                                0, DJREPORT_SHORT_LENGTH - 1)) {
+               retval = -ENODEV;
+               goto hid_parse_fail;
+       }
+
        /* Starts the usb device and connects to upper interfaces hiddev and
         * hidraw */
        retval = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
index ac28f08..5e5fe1b 100644 (file)
@@ -101,9 +101,9 @@ struct mt_device {
        unsigned last_slot_field;       /* the last field of a slot */
        unsigned mt_report_id;  /* the report ID of the multitouch device */
        unsigned pen_report_id; /* the report ID of the pen device */
-       __s8 inputmode;         /* InputMode HID feature, -1 if non-existent */
-       __s8 inputmode_index;   /* InputMode HID feature index in the report */
-       __s8 maxcontact_report_id;      /* Maximum Contact Number HID feature,
+       __s16 inputmode;        /* InputMode HID feature, -1 if non-existent */
+       __s16 inputmode_index;  /* InputMode HID feature index in the report */
+       __s16 maxcontact_report_id;     /* Maximum Contact Number HID feature,
                                   -1 if non-existent */
        __u8 num_received;      /* how many contacts we received */
        __u8 num_expected;      /* expected last contact index */
@@ -312,20 +312,18 @@ static void mt_feature_mapping(struct hid_device *hdev,
                struct hid_field *field, struct hid_usage *usage)
 {
        struct mt_device *td = hid_get_drvdata(hdev);
-       int i;
 
        switch (usage->hid) {
        case HID_DG_INPUTMODE:
-               td->inputmode = field->report->id;
-               td->inputmode_index = 0; /* has to be updated below */
-
-               for (i=0; i < field->maxusage; i++) {
-                       if (field->usage[i].hid == usage->hid) {
-                               td->inputmode_index = i;
-                               break;
-                       }
+               /* Ignore if value index is out of bounds. */
+               if (usage->usage_index >= field->report_count) {
+                       dev_err(&hdev->dev, "HID_DG_INPUTMODE out of range\n");
+                       break;
                }
 
+               td->inputmode = field->report->id;
+               td->inputmode_index = usage->usage_index;
+
                break;
        case HID_DG_CONTACTMAX:
                td->maxcontact_report_id = field->report->id;
@@ -511,6 +509,10 @@ static int mt_touch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
                        mt_store_field(usage, td, hi);
                        return 1;
                case HID_DG_CONTACTCOUNT:
+                       /* Ignore if indexes are out of bounds. */
+                       if (field->index >= field->report->maxfield ||
+                           usage->usage_index >= field->report_count)
+                               return 1;
                        td->cc_index = field->index;
                        td->cc_value_index = usage->usage_index;
                        return 1;
index 30dbb6b..b18320d 100644 (file)
@@ -537,6 +537,10 @@ static int buzz_init(struct hid_device *hdev)
        drv_data = hid_get_drvdata(hdev);
        BUG_ON(!(drv_data->quirks & BUZZ_CONTROLLER));
 
+       /* Validate expected report characteristics. */
+       if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 7))
+               return -ENODEV;
+
        buzz = kzalloc(sizeof(*buzz), GFP_KERNEL);
        if (!buzz) {
                hid_err(hdev, "Insufficient memory, cannot allocate driver data\n");
index d164911..29f328f 100644 (file)
@@ -249,6 +249,11 @@ static int steelseries_srws1_probe(struct hid_device *hdev,
                goto err_free;
        }
 
+       if (!hid_validate_values(hdev, HID_OUTPUT_REPORT, 0, 0, 16)) {
+               ret = -ENODEV;
+               goto err_free;
+       }
+
        ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
        if (ret) {
                hid_err(hdev, "hw start failed\n");
index 6ec28a3..a29756c 100644 (file)
@@ -68,21 +68,13 @@ static int zpff_init(struct hid_device *hid)
        struct hid_report *report;
        struct hid_input *hidinput = list_entry(hid->inputs.next,
                                                struct hid_input, list);
-       struct list_head *report_list =
-                       &hid->report_enum[HID_OUTPUT_REPORT].report_list;
        struct input_dev *dev = hidinput->input;
-       int error;
+       int i, error;
 
-       if (list_empty(report_list)) {
-               hid_err(hid, "no output report found\n");
-               return -ENODEV;
-       }
-
-       report = list_entry(report_list->next, struct hid_report, list);
-
-       if (report->maxfield < 4) {
-               hid_err(hid, "not enough fields in report\n");
-               return -ENODEV;
+       for (i = 0; i < 4; i++) {
+               report = hid_validate_values(hid, HID_OUTPUT_REPORT, 0, i, 1);
+               if (!report)
+                       return -ENODEV;
        }
 
        zpff = kzalloc(sizeof(struct zpff_device), GFP_KERNEL);
index 4fe49d2..eea8172 100644 (file)
@@ -364,7 +364,7 @@ static ssize_t set_pwm1_enable(
        if (config < 0) {
                        dev_err(&client->dev,
                        "Error reading configuration register, aborting.\n");
-                       return -EIO;
+                       return config;
        }
 
        switch (val) {
@@ -416,11 +416,9 @@ static ssize_t get_temp_auto_point_temp(
        case 1:
                return sprintf(buf, "%d\n",
                        data->temp1_auto_point_temp[ix] * 1000);
-               break;
        case 2:
                return sprintf(buf, "%d\n",
                        data->temp2_auto_point_temp[ix] * 1000);
-               break;
        default:
                dev_dbg(dev, "Unknown attr->nr (%d).\n", nr);
                return -EINVAL;
@@ -513,7 +511,6 @@ static ssize_t set_temp_auto_point_temp(
                                count = -EIO;
                }
                goto EXIT;
-               break;
        case 1:
                ptemp[1] = clamp_val(val / 1000, (ptemp[0] & 0x7C) + 4, 124);
                ptemp[1] &= 0x7C;
@@ -665,7 +662,7 @@ static ssize_t set_fan1_div(
        if (config < 0) {
                dev_err(&client->dev,
                        "Error reading configuration register, aborting.\n");
-               return -EIO;
+               return config;
        }
        mutex_lock(&data->update_lock);
        switch (val) {
index b073056..2c137b2 100644 (file)
@@ -248,7 +248,7 @@ static ssize_t set_temp_min(struct device *dev, struct device_attribute *da,
 
        int result = kstrtol(buf, 10, &val);
        if (result < 0)
-               return -EINVAL;
+               return result;
 
        val = DIV_ROUND_CLOSEST(val, 1000);
        if ((val < -63) || (val > 127))
@@ -272,7 +272,7 @@ static ssize_t set_temp_max(struct device *dev, struct device_attribute *da,
 
        int result = kstrtol(buf, 10, &val);
        if (result < 0)
-               return -EINVAL;
+               return result;
 
        val = DIV_ROUND_CLOSEST(val, 1000);
        if ((val < -63) || (val > 127))
@@ -320,7 +320,7 @@ static ssize_t set_fan_div(struct device *dev, struct device_attribute *da,
 
        int status = kstrtol(buf, 10, &new_div);
        if (status < 0)
-               return -EINVAL;
+               return status;
 
        if (new_div == old_div) /* No change */
                return count;
@@ -394,7 +394,7 @@ static ssize_t set_fan_target(struct device *dev, struct device_attribute *da,
 
        int result = kstrtol(buf, 10, &rpm_target);
        if (result < 0)
-               return -EINVAL;
+               return result;
 
        /* Datasheet states 16384 as maximum RPM target (table 3.2) */
        if ((rpm_target < 0) || (rpm_target > 16384))
@@ -440,7 +440,7 @@ static ssize_t set_pwm_enable(struct device *dev, struct device_attribute *da,
 
        int result = kstrtol(buf, 10, &new_value);
        if (result < 0)
-               return -EINVAL;
+               return result;
 
        mutex_lock(&data->update_lock);
        switch (new_value) {
index e2b56a2..632f1dc 100644 (file)
@@ -292,7 +292,7 @@ static int aem_init_ipmi_data(struct aem_ipmi_data *data, int iface,
                dev_err(bmc,
                        "Unable to register user with IPMI interface %d\n",
                        data->interface);
-               return -EACCES;
+               return err;
        }
 
        return 0;
index e633856..d65f3fd 100644 (file)
@@ -202,7 +202,6 @@ static void k10temp_remove(struct pci_dev *pdev)
                           &sensor_dev_attr_temp1_crit.dev_attr);
        device_remove_file(&pdev->dev,
                           &sensor_dev_attr_temp1_crit_hyst.dev_attr);
-       pci_set_drvdata(pdev, NULL);
 }
 
 static DEFINE_PCI_DEVICE_TABLE(k10temp_id_table) = {
index 964c1d6..ae26b06 100644 (file)
@@ -210,7 +210,7 @@ static int tmp421_init_client(struct i2c_client *client)
        if (config < 0) {
                dev_err(&client->dev,
                        "Could not read configuration register (%d)\n", config);
-               return -ENODEV;
+               return config;
        }
 
        config_orig = config;
index e380c6e..7b7ea32 100644 (file)
@@ -75,7 +75,6 @@ config I2C_HELPER_AUTO
 
 config I2C_SMBUS
        tristate "SMBus-specific protocols" if !I2C_HELPER_AUTO
-       depends on GENERIC_HARDIRQS
        help
          Say Y here if you want support for SMBus extensions to the I2C
          specification. At the moment, the only supported extension is
index fcdd321..cdcbd83 100644 (file)
@@ -115,7 +115,7 @@ config I2C_I801
 
 config I2C_ISCH
        tristate "Intel SCH SMBus 1.0"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select LPC_SCH
        help
          Say Y here if you want to use SMBus controller on the Intel SCH
@@ -546,7 +546,6 @@ config I2C_NUC900
 
 config I2C_OCORES
        tristate "OpenCores I2C Controller"
-       depends on GENERIC_HARDIRQS
        help
          If you say yes to this option, support will be included for the
          OpenCores I2C controller. For details see
@@ -791,7 +790,7 @@ config I2C_DIOLAN_U2C
 
 config I2C_PARPORT
        tristate "Parallel port adapter"
-       depends on PARPORT && GENERIC_HARDIRQS
+       depends on PARPORT
        select I2C_ALGOBIT
        select I2C_SMBUS
        help
@@ -816,7 +815,6 @@ config I2C_PARPORT
 
 config I2C_PARPORT_LIGHT
        tristate "Parallel port adapter (light)"
-       depends on GENERIC_HARDIRQS
        select I2C_ALGOBIT
        select I2C_SMBUS
        help
index 5747341..132369f 100644 (file)
@@ -662,7 +662,7 @@ static int davinci_i2c_probe(struct platform_device *pdev)
 #endif
        dev->dev = &pdev->dev;
        dev->irq = irq->start;
-       dev->pdata = dev_get_platdata(&dev->dev);
+       dev->pdata = dev_get_platdata(&pdev->dev);
        platform_set_drvdata(pdev, dev);
 
        if (!dev->pdata && pdev->dev.of_node) {
index cbea327..90cf0cd 100644 (file)
@@ -4,7 +4,6 @@
 
 menuconfig IIO
        tristate "Industrial I/O support"
-       depends on GENERIC_HARDIRQS
        help
          The industrial I/O subsystem provides a unified framework for
          drivers for many different types of embedded sensors using a
index d03ca4c..495be09 100644 (file)
@@ -8,7 +8,7 @@ config INFINIBAND_QIB
 
 config INFINIBAND_QIB_DCA
        bool "QIB DCA support"
-       depends on INFINIBAND_QIB && DCA && SMP && GENERIC_HARDIRQS && !(INFINIBAND_QIB=y && DCA=m)
+       depends on INFINIBAND_QIB && DCA && SMP && !(INFINIBAND_QIB=y && DCA=m)
        default y
        ---help---
        Setting this enables DCA support on some Intel chip sets
index 3f62041..3591855 100644 (file)
@@ -1,7 +1,7 @@
 /*******************************************************************************
  * This file contains iSCSI extentions for RDMA (iSER) Verbs
  *
- * (c) Copyright 2013 RisingTide Systems LLC.
+ * (c) Copyright 2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -39,7 +39,17 @@ static DEFINE_MUTEX(device_list_mutex);
 static LIST_HEAD(device_list);
 static struct workqueue_struct *isert_rx_wq;
 static struct workqueue_struct *isert_comp_wq;
-static struct kmem_cache *isert_cmd_cache;
+
+static void
+isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+              struct isert_rdma_wr *wr);
+static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn);
+static int
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+                   struct isert_rdma_wr *wr);
 
 static void
 isert_qp_event_callback(struct ib_event *e, void *context)
@@ -80,14 +90,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
 {
        struct isert_device *device = isert_conn->conn_device;
        struct ib_qp_init_attr attr;
-       struct ib_device_attr devattr;
        int ret, index, min_index = 0;
 
-       memset(&devattr, 0, sizeof(struct ib_device_attr));
-       ret = isert_query_device(cma_id->device, &devattr);
-       if (ret)
-               return ret;
-
        mutex_lock(&device_list_mutex);
        for (index = 0; index < device->cqs_used; index++)
                if (device->cq_active_qps[index] <
@@ -108,7 +112,7 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id)
         * FIXME: Use devattr.max_sge - 2 for max_send_sge as
         * work-around for RDMA_READ..
         */
-       attr.cap.max_send_sge = devattr.max_sge - 2;
+       attr.cap.max_send_sge = device->dev_attr.max_sge - 2;
        isert_conn->max_sge = attr.cap.max_send_sge;
 
        attr.cap.max_recv_sge = 1;
@@ -210,14 +214,31 @@ isert_create_device_ib_res(struct isert_device *device)
 {
        struct ib_device *ib_dev = device->ib_device;
        struct isert_cq_desc *cq_desc;
+       struct ib_device_attr *dev_attr;
        int ret = 0, i, j;
 
+       dev_attr = &device->dev_attr;
+       ret = isert_query_device(ib_dev, dev_attr);
+       if (ret)
+               return ret;
+
+       /* asign function handlers */
+       if (dev_attr->device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS) {
+               device->use_frwr = 1;
+               device->reg_rdma_mem = isert_reg_rdma_frwr;
+               device->unreg_rdma_mem = isert_unreg_rdma_frwr;
+       } else {
+               device->use_frwr = 0;
+               device->reg_rdma_mem = isert_map_rdma;
+               device->unreg_rdma_mem = isert_unmap_cmd;
+       }
+
        device->cqs_used = min_t(int, num_online_cpus(),
                                 device->ib_device->num_comp_vectors);
        device->cqs_used = min(ISERT_MAX_CQ, device->cqs_used);
-       pr_debug("Using %d CQs, device %s supports %d vectors\n",
+       pr_debug("Using %d CQs, device %s supports %d vectors support FRWR %d\n",
                 device->cqs_used, device->ib_device->name,
-                device->ib_device->num_comp_vectors);
+                device->ib_device->num_comp_vectors, device->use_frwr);
        device->cq_desc = kzalloc(sizeof(struct isert_cq_desc) *
                                device->cqs_used, GFP_KERNEL);
        if (!device->cq_desc) {
@@ -363,6 +384,85 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id)
        return device;
 }
 
+static void
+isert_conn_free_frwr_pool(struct isert_conn *isert_conn)
+{
+       struct fast_reg_descriptor *fr_desc, *tmp;
+       int i = 0;
+
+       if (list_empty(&isert_conn->conn_frwr_pool))
+               return;
+
+       pr_debug("Freeing conn %p frwr pool", isert_conn);
+
+       list_for_each_entry_safe(fr_desc, tmp,
+                                &isert_conn->conn_frwr_pool, list) {
+               list_del(&fr_desc->list);
+               ib_free_fast_reg_page_list(fr_desc->data_frpl);
+               ib_dereg_mr(fr_desc->data_mr);
+               kfree(fr_desc);
+               ++i;
+       }
+
+       if (i < isert_conn->conn_frwr_pool_size)
+               pr_warn("Pool still has %d regions registered\n",
+                       isert_conn->conn_frwr_pool_size - i);
+}
+
+static int
+isert_conn_create_frwr_pool(struct isert_conn *isert_conn)
+{
+       struct fast_reg_descriptor *fr_desc;
+       struct isert_device *device = isert_conn->conn_device;
+       int i, ret;
+
+       INIT_LIST_HEAD(&isert_conn->conn_frwr_pool);
+       isert_conn->conn_frwr_pool_size = 0;
+       for (i = 0; i < ISCSI_DEF_XMIT_CMDS_MAX; i++) {
+               fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL);
+               if (!fr_desc) {
+                       pr_err("Failed to allocate fast_reg descriptor\n");
+                       ret = -ENOMEM;
+                       goto err;
+               }
+
+               fr_desc->data_frpl =
+                       ib_alloc_fast_reg_page_list(device->ib_device,
+                                                   ISCSI_ISER_SG_TABLESIZE);
+               if (IS_ERR(fr_desc->data_frpl)) {
+                       pr_err("Failed to allocate fr_pg_list err=%ld\n",
+                              PTR_ERR(fr_desc->data_frpl));
+                       ret = PTR_ERR(fr_desc->data_frpl);
+                       goto err;
+               }
+
+               fr_desc->data_mr = ib_alloc_fast_reg_mr(device->dev_pd,
+                                       ISCSI_ISER_SG_TABLESIZE);
+               if (IS_ERR(fr_desc->data_mr)) {
+                       pr_err("Failed to allocate frmr err=%ld\n",
+                              PTR_ERR(fr_desc->data_mr));
+                       ret = PTR_ERR(fr_desc->data_mr);
+                       ib_free_fast_reg_page_list(fr_desc->data_frpl);
+                       goto err;
+               }
+               pr_debug("Create fr_desc %p page_list %p\n",
+                        fr_desc, fr_desc->data_frpl->page_list);
+
+               fr_desc->valid = true;
+               list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
+               isert_conn->conn_frwr_pool_size++;
+       }
+
+       pr_debug("Creating conn %p frwr pool size=%d",
+                isert_conn, isert_conn->conn_frwr_pool_size);
+
+       return 0;
+
+err:
+       isert_conn_free_frwr_pool(isert_conn);
+       return ret;
+}
+
 static int
 isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
 {
@@ -389,6 +489,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        kref_init(&isert_conn->conn_kref);
        kref_get(&isert_conn->conn_kref);
        mutex_init(&isert_conn->conn_mutex);
+       spin_lock_init(&isert_conn->conn_lock);
 
        cma_id->context = isert_conn;
        isert_conn->conn_cm_id = cma_id;
@@ -446,6 +547,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        isert_conn->conn_pd = device->dev_pd;
        isert_conn->conn_mr = device->dev_mr;
 
+       if (device->use_frwr) {
+               ret = isert_conn_create_frwr_pool(isert_conn);
+               if (ret) {
+                       pr_err("Conn: %p failed to create frwr_pool\n", isert_conn);
+                       goto out_frwr;
+               }
+       }
+
        ret = isert_conn_setup_qp(isert_conn, cma_id);
        if (ret)
                goto out_conn_dev;
@@ -459,6 +568,9 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event)
        return 0;
 
 out_conn_dev:
+       if (device->use_frwr)
+               isert_conn_free_frwr_pool(isert_conn);
+out_frwr:
        isert_device_try_release(device);
 out_rsp_dma_map:
        ib_dma_unmap_single(ib_dev, isert_conn->login_rsp_dma,
@@ -482,6 +594,9 @@ isert_connect_release(struct isert_conn *isert_conn)
 
        pr_debug("Entering isert_connect_release(): >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n");
 
+       if (device->use_frwr)
+               isert_conn_free_frwr_pool(isert_conn);
+
        if (isert_conn->conn_qp) {
                cq_index = ((struct isert_cq_desc *)
                        isert_conn->conn_qp->recv_cq->cq_context)->cq_index;
@@ -869,46 +984,37 @@ isert_rx_login_req(struct iser_rx_desc *rx_desc, int rx_buflen,
                 size, rx_buflen, MAX_KEY_VALUE_PAIRS);
        memcpy(login->req_buf, &rx_desc->data[0], size);
 
-       complete(&isert_conn->conn_login_comp);
-}
-
-static void
-isert_release_cmd(struct iscsi_cmd *cmd)
-{
-       struct isert_cmd *isert_cmd = container_of(cmd, struct isert_cmd,
-                                                  iscsi_cmd);
-
-       pr_debug("Entering isert_release_cmd %p >>>>>>>>>>>>>>>.\n", isert_cmd);
-
-       kfree(cmd->buf_ptr);
-       kfree(cmd->tmr_req);
-
-       kmem_cache_free(isert_cmd_cache, isert_cmd);
+       if (login->first_request) {
+               complete(&isert_conn->conn_login_comp);
+               return;
+       }
+       schedule_delayed_work(&conn->login_work, 0);
 }
 
 static struct iscsi_cmd
-*isert_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp)
+*isert_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp)
 {
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct isert_cmd *isert_cmd;
+       struct iscsi_cmd *cmd;
 
-       isert_cmd = kmem_cache_zalloc(isert_cmd_cache, gfp);
-       if (!isert_cmd) {
-               pr_err("Unable to allocate isert_cmd\n");
+       cmd = iscsit_allocate_cmd(conn, gfp);
+       if (!cmd) {
+               pr_err("Unable to allocate iscsi_cmd + isert_cmd\n");
                return NULL;
        }
+       isert_cmd = iscsit_priv_cmd(cmd);
        isert_cmd->conn = isert_conn;
-       isert_cmd->iscsi_cmd.release_cmd = &isert_release_cmd;
+       isert_cmd->iscsi_cmd = cmd;
 
-       return &isert_cmd->iscsi_cmd;
+       return cmd;
 }
 
 static int
 isert_handle_scsi_cmd(struct isert_conn *isert_conn,
-                     struct isert_cmd *isert_cmd, struct iser_rx_desc *rx_desc,
-                     unsigned char *buf)
+                     struct isert_cmd *isert_cmd, struct iscsi_cmd *cmd,
+                     struct iser_rx_desc *rx_desc, unsigned char *buf)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
        struct iscsi_conn *conn = isert_conn->conn;
        struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)buf;
        struct scatterlist *sg;
@@ -1015,9 +1121,9 @@ isert_handle_iscsi_dataout(struct isert_conn *isert_conn,
 
 static int
 isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-                    struct iser_rx_desc *rx_desc, unsigned char *buf)
+                    struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+                    unsigned char *buf)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
        struct iscsi_conn *conn = isert_conn->conn;
        struct iscsi_nopout *hdr = (struct iscsi_nopout *)buf;
        int rc;
@@ -1034,9 +1140,9 @@ isert_handle_nop_out(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 
 static int
 isert_handle_text_cmd(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
-                     struct iser_rx_desc *rx_desc, struct iscsi_text *hdr)
+                     struct iscsi_cmd *cmd, struct iser_rx_desc *rx_desc,
+                     struct iscsi_text *hdr)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
        struct iscsi_conn *conn = isert_conn->conn;
        u32 payload_length = ntoh24(hdr->dlength);
        int rc;
@@ -1081,26 +1187,26 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
 
        switch (opcode) {
        case ISCSI_OP_SCSI_CMD:
-               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               cmd = isert_allocate_cmd(conn, GFP_KERNEL);
                if (!cmd)
                        break;
 
-               isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
+               isert_cmd = iscsit_priv_cmd(cmd);
                isert_cmd->read_stag = read_stag;
                isert_cmd->read_va = read_va;
                isert_cmd->write_stag = write_stag;
                isert_cmd->write_va = write_va;
 
-               ret = isert_handle_scsi_cmd(isert_conn, isert_cmd,
+               ret = isert_handle_scsi_cmd(isert_conn, isert_cmd, cmd,
                                        rx_desc, (unsigned char *)hdr);
                break;
        case ISCSI_OP_NOOP_OUT:
-               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               cmd = isert_allocate_cmd(conn, GFP_KERNEL);
                if (!cmd)
                        break;
 
-               isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
-               ret = isert_handle_nop_out(isert_conn, isert_cmd,
+               isert_cmd = iscsit_priv_cmd(cmd);
+               ret = isert_handle_nop_out(isert_conn, isert_cmd, cmd,
                                           rx_desc, (unsigned char *)hdr);
                break;
        case ISCSI_OP_SCSI_DATA_OUT:
@@ -1108,7 +1214,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                                                (unsigned char *)hdr);
                break;
        case ISCSI_OP_SCSI_TMFUNC:
-               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               cmd = isert_allocate_cmd(conn, GFP_KERNEL);
                if (!cmd)
                        break;
 
@@ -1116,7 +1222,7 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                                                (unsigned char *)hdr);
                break;
        case ISCSI_OP_LOGOUT:
-               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               cmd = isert_allocate_cmd(conn, GFP_KERNEL);
                if (!cmd)
                        break;
 
@@ -1127,12 +1233,12 @@ isert_rx_opcode(struct isert_conn *isert_conn, struct iser_rx_desc *rx_desc,
                                                    HZ);
                break;
        case ISCSI_OP_TEXT:
-               cmd = iscsit_allocate_cmd(conn, GFP_KERNEL);
+               cmd = isert_allocate_cmd(conn, GFP_KERNEL);
                if (!cmd)
                        break;
 
-               isert_cmd = container_of(cmd, struct isert_cmd, iscsi_cmd);
-               ret = isert_handle_text_cmd(isert_conn, isert_cmd,
+               isert_cmd = iscsit_priv_cmd(cmd);
+               ret = isert_handle_text_cmd(isert_conn, isert_cmd, cmd,
                                            rx_desc, (struct iscsi_text *)hdr);
                break;
        default:
@@ -1243,26 +1349,65 @@ isert_unmap_cmd(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
        struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
 
-       pr_debug("isert_unmap_cmd >>>>>>>>>>>>>>>>>>>>>>>\n");
+       pr_debug("isert_unmap_cmd: %p\n", isert_cmd);
+       if (wr->sge) {
+               pr_debug("isert_unmap_cmd: %p unmap_sg op\n", isert_cmd);
+               ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
+                               (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                               DMA_TO_DEVICE : DMA_FROM_DEVICE);
+               wr->sge = NULL;
+       }
+
+       if (wr->send_wr) {
+               pr_debug("isert_unmap_cmd: %p free send_wr\n", isert_cmd);
+               kfree(wr->send_wr);
+               wr->send_wr = NULL;
+       }
+
+       if (wr->ib_sge) {
+               pr_debug("isert_unmap_cmd: %p free ib_sge\n", isert_cmd);
+               kfree(wr->ib_sge);
+               wr->ib_sge = NULL;
+       }
+}
+
+static void
+isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn)
+{
+       struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       LIST_HEAD(unmap_list);
+
+       pr_debug("unreg_frwr_cmd: %p\n", isert_cmd);
+
+       if (wr->fr_desc) {
+               pr_debug("unreg_frwr_cmd: %p free fr_desc %p\n",
+                        isert_cmd, wr->fr_desc);
+               spin_lock_bh(&isert_conn->conn_lock);
+               list_add_tail(&wr->fr_desc->list, &isert_conn->conn_frwr_pool);
+               spin_unlock_bh(&isert_conn->conn_lock);
+               wr->fr_desc = NULL;
+       }
 
        if (wr->sge) {
-               ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE);
+               pr_debug("unreg_frwr_cmd: %p unmap_sg op\n", isert_cmd);
+               ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge,
+                               (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                               DMA_TO_DEVICE : DMA_FROM_DEVICE);
                wr->sge = NULL;
        }
 
-       kfree(wr->send_wr);
+       wr->ib_sge = NULL;
        wr->send_wr = NULL;
-
-       kfree(isert_cmd->ib_sge);
-       isert_cmd->ib_sge = NULL;
 }
 
 static void
 isert_put_cmd(struct isert_cmd *isert_cmd)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct isert_conn *isert_conn = isert_cmd->conn;
        struct iscsi_conn *conn = isert_conn->conn;
+       struct isert_device *device = isert_conn->conn_device;
 
        pr_debug("Entering isert_put_cmd: %p\n", isert_cmd);
 
@@ -1276,7 +1421,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
                if (cmd->data_direction == DMA_TO_DEVICE)
                        iscsit_stop_dataout_timer(cmd);
 
-               isert_unmap_cmd(isert_cmd, isert_conn);
+               device->unreg_rdma_mem(isert_cmd, isert_conn);
                transport_generic_free_cmd(&cmd->se_cmd, 0);
                break;
        case ISCSI_OP_SCSI_TMFUNC:
@@ -1311,7 +1456,7 @@ isert_put_cmd(struct isert_cmd *isert_cmd)
                 * Fall-through
                 */
        default:
-               isert_release_cmd(cmd);
+               iscsit_release_cmd(cmd);
                break;
        }
 }
@@ -1347,27 +1492,16 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc,
                           struct isert_cmd *isert_cmd)
 {
        struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct se_cmd *se_cmd = &cmd->se_cmd;
-       struct ib_device *ib_dev = isert_cmd->conn->conn_cm_id->device;
+       struct isert_conn *isert_conn = isert_cmd->conn;
+       struct isert_device *device = isert_conn->conn_device;
 
        iscsit_stop_dataout_timer(cmd);
+       device->unreg_rdma_mem(isert_cmd, isert_conn);
+       cmd->write_data_done = wr->cur_rdma_length;
 
-       if (wr->sge) {
-               pr_debug("isert_do_rdma_read_comp: Unmapping wr->sge from t_data_sg\n");
-               ib_dma_unmap_sg(ib_dev, wr->sge, wr->num_sge, DMA_TO_DEVICE);
-               wr->sge = NULL;
-       }
-
-       if (isert_cmd->ib_sge) {
-               pr_debug("isert_do_rdma_read_comp: Freeing isert_cmd->ib_sge\n");
-               kfree(isert_cmd->ib_sge);
-               isert_cmd->ib_sge = NULL;
-       }
-
-       cmd->write_data_done = se_cmd->data_length;
-
-       pr_debug("isert_do_rdma_read_comp, calling target_execute_cmd\n");
+       pr_debug("Cmd: %p RDMA_READ comp calling execute_cmd\n", isert_cmd);
        spin_lock_bh(&cmd->istate_lock);
        cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT;
        cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT;
@@ -1383,7 +1517,7 @@ isert_do_control_comp(struct work_struct *work)
                        struct isert_cmd, comp_work);
        struct isert_conn *isert_conn = isert_cmd->conn;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
        switch (cmd->i_state) {
        case ISTATE_SEND_TASKMGTRSP:
@@ -1429,7 +1563,7 @@ isert_response_completion(struct iser_tx_desc *tx_desc,
                          struct isert_conn *isert_conn,
                          struct ib_device *ib_dev)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
 
        if (cmd->i_state == ISTATE_SEND_TASKMGTRSP ||
            cmd->i_state == ISTATE_SEND_LOGOUTRSP ||
@@ -1621,8 +1755,7 @@ isert_post_response(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd)
 static int
 isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                                       struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
        struct iscsi_scsi_rsp *hdr = (struct iscsi_scsi_rsp *)
@@ -1671,8 +1804,7 @@ static int
 isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
                bool nopout_response)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                               struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
@@ -1691,8 +1823,7 @@ isert_put_nopin(struct iscsi_cmd *cmd, struct iscsi_conn *conn,
 static int
 isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                               struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
@@ -1710,8 +1841,7 @@ isert_put_logout_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 static int
 isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                               struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
 
@@ -1729,8 +1859,7 @@ isert_put_tm_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 static int
 isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                               struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
@@ -1762,8 +1891,7 @@ isert_put_reject(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 static int
 isert_put_text_rsp(struct iscsi_cmd *cmd, struct iscsi_conn *conn)
 {
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                               struct isert_cmd, iscsi_cmd);
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
        struct ib_send_wr *send_wr = &isert_cmd->tx_desc.send_wr;
        struct iscsi_text_rsp *hdr =
@@ -1805,7 +1933,7 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                    struct ib_sge *ib_sge, struct ib_send_wr *send_wr,
                    u32 data_left, u32 offset)
 {
-       struct iscsi_cmd *cmd = &isert_cmd->iscsi_cmd;
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
        struct scatterlist *sg_start, *tmp_sg;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
        u32 sg_off, page_off;
@@ -1832,8 +1960,8 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
                                ib_sg_dma_len(ib_dev, tmp_sg) - page_off);
                ib_sge->lkey = isert_conn->conn_mr->lkey;
 
-               pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u\n",
-                        ib_sge->addr, ib_sge->length);
+               pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n",
+                        ib_sge->addr, ib_sge->length, ib_sge->lkey);
                page_off = 0;
                data_left -= ib_sge->length;
                ib_sge++;
@@ -1847,200 +1975,373 @@ isert_build_rdma_wr(struct isert_conn *isert_conn, struct isert_cmd *isert_cmd,
 }
 
 static int
-isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+              struct isert_rdma_wr *wr)
 {
        struct se_cmd *se_cmd = &cmd->se_cmd;
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                                       struct isert_cmd, iscsi_cmd);
-       struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct ib_send_wr *wr_failed, *send_wr;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_send_wr *send_wr;
        struct ib_sge *ib_sge;
-       struct scatterlist *sg;
-       u32 offset = 0, data_len, data_left, rdma_write_max;
-       int rc, ret = 0, count, sg_nents, i, ib_sge_cnt;
-
-       pr_debug("RDMA_WRITE: data_length: %u\n", se_cmd->data_length);
+       struct scatterlist *sg_start;
+       u32 sg_off = 0, sg_nents;
+       u32 offset = 0, data_len, data_left, rdma_write_max, va_offset = 0;
+       int ret = 0, count, i, ib_sge_cnt;
+
+       if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+               data_left = se_cmd->data_length;
+               iscsit_increment_maxcmdsn(cmd, conn->sess);
+               cmd->stat_sn = conn->stat_sn++;
+       } else {
+               sg_off = cmd->write_data_done / PAGE_SIZE;
+               data_left = se_cmd->data_length - cmd->write_data_done;
+               offset = cmd->write_data_done;
+               isert_cmd->tx_desc.isert_cmd = isert_cmd;
+       }
 
-       sg = &se_cmd->t_data_sg[0];
-       sg_nents = se_cmd->t_data_nents;
+       sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+       sg_nents = se_cmd->t_data_nents - sg_off;
 
-       count = ib_dma_map_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE);
+       count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
+                             (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                             DMA_TO_DEVICE : DMA_FROM_DEVICE);
        if (unlikely(!count)) {
-               pr_err("Unable to map put_datain SGs\n");
+               pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
                return -EINVAL;
        }
-       wr->sge = sg;
+       wr->sge = sg_start;
        wr->num_sge = sg_nents;
-       pr_debug("Mapped IB count: %u sg: %p sg_nents: %u for RDMA_WRITE\n",
-                count, sg, sg_nents);
+       wr->cur_rdma_length = data_left;
+       pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+                isert_cmd, count, sg_start, sg_nents, data_left);
 
        ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL);
        if (!ib_sge) {
-               pr_warn("Unable to allocate datain ib_sge\n");
+               pr_warn("Unable to allocate ib_sge\n");
                ret = -ENOMEM;
                goto unmap_sg;
        }
-       isert_cmd->ib_sge = ib_sge;
-
-       pr_debug("Allocated ib_sge: %p from t_data_ents: %d for RDMA_WRITE\n",
-                ib_sge, se_cmd->t_data_nents);
+       wr->ib_sge = ib_sge;
 
        wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge);
        wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
                                GFP_KERNEL);
        if (!wr->send_wr) {
-               pr_err("Unable to allocate wr->send_wr\n");
+               pr_debug("Unable to allocate wr->send_wr\n");
                ret = -ENOMEM;
                goto unmap_sg;
        }
-       pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n",
-                wr->send_wr, wr->send_wr_num);
-
-       iscsit_increment_maxcmdsn(cmd, conn->sess);
-       cmd->stat_sn = conn->stat_sn++;
 
        wr->isert_cmd = isert_cmd;
        rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
-       data_left = se_cmd->data_length;
 
        for (i = 0; i < wr->send_wr_num; i++) {
                send_wr = &isert_cmd->rdma_wr.send_wr[i];
                data_len = min(data_left, rdma_write_max);
 
-               send_wr->opcode = IB_WR_RDMA_WRITE;
                send_wr->send_flags = 0;
-               send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
-               send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+               if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+                       send_wr->opcode = IB_WR_RDMA_WRITE;
+                       send_wr->wr.rdma.remote_addr = isert_cmd->read_va + offset;
+                       send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+                       if (i + 1 == wr->send_wr_num)
+                               send_wr->next = &isert_cmd->tx_desc.send_wr;
+                       else
+                               send_wr->next = &wr->send_wr[i + 1];
+               } else {
+                       send_wr->opcode = IB_WR_RDMA_READ;
+                       send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
+                       send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+                       if (i + 1 == wr->send_wr_num)
+                               send_wr->send_flags = IB_SEND_SIGNALED;
+                       else
+                               send_wr->next = &wr->send_wr[i + 1];
+               }
 
                ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
                                        send_wr, data_len, offset);
                ib_sge += ib_sge_cnt;
 
-               if (i + 1 == wr->send_wr_num)
-                       send_wr->next = &isert_cmd->tx_desc.send_wr;
-               else
-                       send_wr->next = &wr->send_wr[i + 1];
-
                offset += data_len;
+               va_offset += data_len;
                data_left -= data_len;
        }
-       /*
-        * Build isert_conn->tx_desc for iSCSI response PDU and attach
-        */
-       isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
-       iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
-                            &isert_cmd->tx_desc.iscsi_header);
-       isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
-       isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
 
-       atomic_inc(&isert_conn->post_send_buf_count);
+       return 0;
+unmap_sg:
+       ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
+                       (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+       return ret;
+}
 
-       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
-       if (rc) {
-               pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
-               atomic_dec(&isert_conn->post_send_buf_count);
+static int
+isert_map_fr_pagelist(struct ib_device *ib_dev,
+                     struct scatterlist *sg_start, int sg_nents, u64 *fr_pl)
+{
+       u64 start_addr, end_addr, page, chunk_start = 0;
+       struct scatterlist *tmp_sg;
+       int i = 0, new_chunk, last_ent, n_pages;
+
+       n_pages = 0;
+       new_chunk = 1;
+       last_ent = sg_nents - 1;
+       for_each_sg(sg_start, tmp_sg, sg_nents, i) {
+               start_addr = ib_sg_dma_address(ib_dev, tmp_sg);
+               if (new_chunk)
+                       chunk_start = start_addr;
+               end_addr = start_addr + ib_sg_dma_len(ib_dev, tmp_sg);
+
+               pr_debug("SGL[%d] dma_addr: 0x%16llx len: %u\n",
+                        i, (unsigned long long)tmp_sg->dma_address,
+                        tmp_sg->length);
+
+               if ((end_addr & ~PAGE_MASK) && i < last_ent) {
+                       new_chunk = 0;
+                       continue;
+               }
+               new_chunk = 1;
+
+               page = chunk_start & PAGE_MASK;
+               do {
+                       fr_pl[n_pages++] = page;
+                       pr_debug("Mapped page_list[%d] page_addr: 0x%16llx\n",
+                                n_pages - 1, page);
+                       page += PAGE_SIZE;
+               } while (page < end_addr);
        }
-       pr_debug("Posted RDMA_WRITE + Response for iSER Data READ\n");
-       return 1;
 
-unmap_sg:
-       ib_dma_unmap_sg(ib_dev, sg, sg_nents, DMA_TO_DEVICE);
+       return n_pages;
+}
+
+static int
+isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc,
+                 struct isert_cmd *isert_cmd, struct isert_conn *isert_conn,
+                 struct ib_sge *ib_sge, u32 offset, unsigned int data_len)
+{
+       struct iscsi_cmd *cmd = isert_cmd->iscsi_cmd;
+       struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct scatterlist *sg_start;
+       u32 sg_off, page_off;
+       struct ib_send_wr fr_wr, inv_wr;
+       struct ib_send_wr *bad_wr, *wr = NULL;
+       u8 key;
+       int ret, sg_nents, pagelist_len;
+
+       sg_off = offset / PAGE_SIZE;
+       sg_start = &cmd->se_cmd.t_data_sg[sg_off];
+       sg_nents = min_t(unsigned int, cmd->se_cmd.t_data_nents - sg_off,
+                        ISCSI_ISER_SG_TABLESIZE);
+       page_off = offset % PAGE_SIZE;
+
+       pr_debug("Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n",
+                isert_cmd, fr_desc, sg_nents, sg_off, offset);
+
+       pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents,
+                                            &fr_desc->data_frpl->page_list[0]);
+
+       if (!fr_desc->valid) {
+               memset(&inv_wr, 0, sizeof(inv_wr));
+               inv_wr.opcode = IB_WR_LOCAL_INV;
+               inv_wr.ex.invalidate_rkey = fr_desc->data_mr->rkey;
+               wr = &inv_wr;
+               /* Bump the key */
+               key = (u8)(fr_desc->data_mr->rkey & 0x000000FF);
+               ib_update_fast_reg_key(fr_desc->data_mr, ++key);
+       }
+
+       /* Prepare FASTREG WR */
+       memset(&fr_wr, 0, sizeof(fr_wr));
+       fr_wr.opcode = IB_WR_FAST_REG_MR;
+       fr_wr.wr.fast_reg.iova_start =
+               fr_desc->data_frpl->page_list[0] + page_off;
+       fr_wr.wr.fast_reg.page_list = fr_desc->data_frpl;
+       fr_wr.wr.fast_reg.page_list_len = pagelist_len;
+       fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+       fr_wr.wr.fast_reg.length = data_len;
+       fr_wr.wr.fast_reg.rkey = fr_desc->data_mr->rkey;
+       fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE;
+
+       if (!wr)
+               wr = &fr_wr;
+       else
+               wr->next = &fr_wr;
+
+       ret = ib_post_send(isert_conn->conn_qp, wr, &bad_wr);
+       if (ret) {
+               pr_err("fast registration failed, ret:%d\n", ret);
+               return ret;
+       }
+       fr_desc->valid = false;
+
+       ib_sge->lkey = fr_desc->data_mr->lkey;
+       ib_sge->addr = fr_desc->data_frpl->page_list[0] + page_off;
+       ib_sge->length = data_len;
+
+       pr_debug("RDMA ib_sge: addr: 0x%16llx  length: %u lkey: %08x\n",
+                ib_sge->addr, ib_sge->length, ib_sge->lkey);
+
        return ret;
 }
 
 static int
-isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
+isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd,
+                   struct isert_rdma_wr *wr)
 {
        struct se_cmd *se_cmd = &cmd->se_cmd;
-       struct isert_cmd *isert_cmd = container_of(cmd,
-                                       struct isert_cmd, iscsi_cmd);
-       struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
        struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
-       struct ib_send_wr *wr_failed, *send_wr;
-       struct ib_sge *ib_sge;
        struct ib_device *ib_dev = isert_conn->conn_cm_id->device;
+       struct ib_send_wr *send_wr;
+       struct ib_sge *ib_sge;
        struct scatterlist *sg_start;
-       u32 sg_off, sg_nents, page_off, va_offset = 0;
+       struct fast_reg_descriptor *fr_desc;
+       u32 sg_off = 0, sg_nents;
        u32 offset = 0, data_len, data_left, rdma_write_max;
-       int rc, ret = 0, count, i, ib_sge_cnt;
+       int ret = 0, count;
+       unsigned long flags;
 
-       pr_debug("RDMA_READ: data_length: %u write_data_done: %u\n",
-                se_cmd->data_length, cmd->write_data_done);
+       if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+               data_left = se_cmd->data_length;
+               iscsit_increment_maxcmdsn(cmd, conn->sess);
+               cmd->stat_sn = conn->stat_sn++;
+       } else {
+               sg_off = cmd->write_data_done / PAGE_SIZE;
+               data_left = se_cmd->data_length - cmd->write_data_done;
+               offset = cmd->write_data_done;
+               isert_cmd->tx_desc.isert_cmd = isert_cmd;
+       }
 
-       sg_off = cmd->write_data_done / PAGE_SIZE;
        sg_start = &cmd->se_cmd.t_data_sg[sg_off];
-       page_off = cmd->write_data_done % PAGE_SIZE;
-
-       pr_debug("RDMA_READ: sg_off: %d, sg_start: %p page_off: %d\n",
-                sg_off, sg_start, page_off);
-
-       data_left = se_cmd->data_length - cmd->write_data_done;
        sg_nents = se_cmd->t_data_nents - sg_off;
 
-       pr_debug("RDMA_READ: data_left: %d, sg_nents: %d\n",
-                data_left, sg_nents);
-
-       count = ib_dma_map_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE);
+       count = ib_dma_map_sg(ib_dev, sg_start, sg_nents,
+                             (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                             DMA_TO_DEVICE : DMA_FROM_DEVICE);
        if (unlikely(!count)) {
-               pr_err("Unable to map get_dataout SGs\n");
+               pr_err("Cmd: %p unrable to map SGs\n", isert_cmd);
                return -EINVAL;
        }
        wr->sge = sg_start;
        wr->num_sge = sg_nents;
-       pr_debug("Mapped IB count: %u sg_start: %p sg_nents: %u for RDMA_READ\n",
-                count, sg_start, sg_nents);
+       pr_debug("Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n",
+                isert_cmd, count, sg_start, sg_nents, data_left);
 
-       ib_sge = kzalloc(sizeof(struct ib_sge) * sg_nents, GFP_KERNEL);
-       if (!ib_sge) {
-               pr_warn("Unable to allocate dataout ib_sge\n");
-               ret = -ENOMEM;
-               goto unmap_sg;
+       memset(&wr->s_ib_sge, 0, sizeof(*ib_sge));
+       ib_sge = &wr->s_ib_sge;
+       wr->ib_sge = ib_sge;
+
+       wr->send_wr_num = 1;
+       memset(&wr->s_send_wr, 0, sizeof(*send_wr));
+       wr->send_wr = &wr->s_send_wr;
+
+       wr->isert_cmd = isert_cmd;
+       rdma_write_max = ISCSI_ISER_SG_TABLESIZE * PAGE_SIZE;
+
+       send_wr = &isert_cmd->rdma_wr.s_send_wr;
+       send_wr->sg_list = ib_sge;
+       send_wr->num_sge = 1;
+       send_wr->wr_id = (unsigned long)&isert_cmd->tx_desc;
+       if (wr->iser_ib_op == ISER_IB_RDMA_WRITE) {
+               send_wr->opcode = IB_WR_RDMA_WRITE;
+               send_wr->wr.rdma.remote_addr = isert_cmd->read_va;
+               send_wr->wr.rdma.rkey = isert_cmd->read_stag;
+               send_wr->send_flags = 0;
+               send_wr->next = &isert_cmd->tx_desc.send_wr;
+       } else {
+               send_wr->opcode = IB_WR_RDMA_READ;
+               send_wr->wr.rdma.remote_addr = isert_cmd->write_va;
+               send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+               send_wr->send_flags = IB_SEND_SIGNALED;
        }
-       isert_cmd->ib_sge = ib_sge;
 
-       pr_debug("Using ib_sge: %p from sg_ents: %d for RDMA_READ\n",
-                ib_sge, sg_nents);
+       data_len = min(data_left, rdma_write_max);
+       wr->cur_rdma_length = data_len;
 
-       wr->send_wr_num = DIV_ROUND_UP(sg_nents, isert_conn->max_sge);
-       wr->send_wr = kzalloc(sizeof(struct ib_send_wr) * wr->send_wr_num,
-                               GFP_KERNEL);
-       if (!wr->send_wr) {
-               pr_debug("Unable to allocate wr->send_wr\n");
-               ret = -ENOMEM;
+       spin_lock_irqsave(&isert_conn->conn_lock, flags);
+       fr_desc = list_first_entry(&isert_conn->conn_frwr_pool,
+                                  struct fast_reg_descriptor, list);
+       list_del(&fr_desc->list);
+       spin_unlock_irqrestore(&isert_conn->conn_lock, flags);
+       wr->fr_desc = fr_desc;
+
+       ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn,
+                         ib_sge, offset, data_len);
+       if (ret) {
+               list_add_tail(&fr_desc->list, &isert_conn->conn_frwr_pool);
                goto unmap_sg;
        }
-       pr_debug("Allocated wr->send_wr: %p wr->send_wr_num: %u\n",
-                wr->send_wr, wr->send_wr_num);
 
-       isert_cmd->tx_desc.isert_cmd = isert_cmd;
+       return 0;
 
-       wr->iser_ib_op = ISER_IB_RDMA_READ;
-       wr->isert_cmd = isert_cmd;
-       rdma_write_max = isert_conn->max_sge * PAGE_SIZE;
-       offset = cmd->write_data_done;
+unmap_sg:
+       ib_dma_unmap_sg(ib_dev, sg_start, sg_nents,
+                       (wr->iser_ib_op == ISER_IB_RDMA_WRITE) ?
+                       DMA_TO_DEVICE : DMA_FROM_DEVICE);
+       return ret;
+}
 
-       for (i = 0; i < wr->send_wr_num; i++) {
-               send_wr = &isert_cmd->rdma_wr.send_wr[i];
-               data_len = min(data_left, rdma_write_max);
+static int
+isert_put_datain(struct iscsi_conn *conn, struct iscsi_cmd *cmd)
+{
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+       struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_device *device = isert_conn->conn_device;
+       struct ib_send_wr *wr_failed;
+       int rc;
 
-               send_wr->opcode = IB_WR_RDMA_READ;
-               send_wr->wr.rdma.remote_addr = isert_cmd->write_va + va_offset;
-               send_wr->wr.rdma.rkey = isert_cmd->write_stag;
+       pr_debug("Cmd: %p RDMA_WRITE data_length: %u\n",
+                isert_cmd, se_cmd->data_length);
+       wr->iser_ib_op = ISER_IB_RDMA_WRITE;
+       rc = device->reg_rdma_mem(conn, cmd, wr);
+       if (rc) {
+               pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+               return rc;
+       }
 
-               ib_sge_cnt = isert_build_rdma_wr(isert_conn, isert_cmd, ib_sge,
-                                       send_wr, data_len, offset);
-               ib_sge += ib_sge_cnt;
+       /*
+        * Build isert_conn->tx_desc for iSCSI response PDU and attach
+        */
+       isert_create_send_desc(isert_conn, isert_cmd, &isert_cmd->tx_desc);
+       iscsit_build_rsp_pdu(cmd, conn, false, (struct iscsi_scsi_rsp *)
+                            &isert_cmd->tx_desc.iscsi_header);
+       isert_init_tx_hdrs(isert_conn, &isert_cmd->tx_desc);
+       isert_init_send_wr(isert_cmd, &isert_cmd->tx_desc.send_wr);
 
-               if (i + 1 == wr->send_wr_num)
-                       send_wr->send_flags = IB_SEND_SIGNALED;
-               else
-                       send_wr->next = &wr->send_wr[i + 1];
+       atomic_inc(&isert_conn->post_send_buf_count);
 
-               offset += data_len;
-               va_offset += data_len;
-               data_left -= data_len;
+       rc = ib_post_send(isert_conn->conn_qp, wr->send_wr, &wr_failed);
+       if (rc) {
+               pr_warn("ib_post_send() failed for IB_WR_RDMA_WRITE\n");
+               atomic_dec(&isert_conn->post_send_buf_count);
+       }
+       pr_debug("Cmd: %p posted RDMA_WRITE + Response for iSER Data READ\n",
+                isert_cmd);
+
+       return 1;
+}
+
+static int
+isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
+{
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+       struct isert_cmd *isert_cmd = iscsit_priv_cmd(cmd);
+       struct isert_rdma_wr *wr = &isert_cmd->rdma_wr;
+       struct isert_conn *isert_conn = (struct isert_conn *)conn->context;
+       struct isert_device *device = isert_conn->conn_device;
+       struct ib_send_wr *wr_failed;
+       int rc;
+
+       pr_debug("Cmd: %p RDMA_READ data_length: %u write_data_done: %u\n",
+                isert_cmd, se_cmd->data_length, cmd->write_data_done);
+       wr->iser_ib_op = ISER_IB_RDMA_READ;
+       rc = device->reg_rdma_mem(conn, cmd, wr);
+       if (rc) {
+               pr_err("Cmd: %p failed to prepare RDMA res\n", isert_cmd);
+               return rc;
        }
 
        atomic_inc(&isert_conn->post_send_buf_count);
@@ -2050,12 +2351,10 @@ isert_get_dataout(struct iscsi_conn *conn, struct iscsi_cmd *cmd, bool recovery)
                pr_warn("ib_post_send() failed for IB_WR_RDMA_READ\n");
                atomic_dec(&isert_conn->post_send_buf_count);
        }
-       pr_debug("Posted RDMA_READ memory for ISER Data WRITE\n");
-       return 0;
+       pr_debug("Cmd: %p posted RDMA_READ memory for ISER Data WRITE\n",
+                isert_cmd);
 
-unmap_sg:
-       ib_dma_unmap_sg(ib_dev, sg_start, sg_nents, DMA_FROM_DEVICE);
-       return ret;
+       return 0;
 }
 
 static int
@@ -2224,6 +2523,14 @@ isert_get_login_rx(struct iscsi_conn *conn, struct iscsi_login *login)
        int ret;
 
        pr_debug("isert_get_login_rx before conn_login_comp conn: %p\n", conn);
+       /*
+        * For login requests after the first PDU, isert_rx_login_req() will
+        * kick schedule_delayed_work(&conn->login_work) as the packet is
+        * received, which turns this callback from iscsi_target_do_login_rx()
+        * into a NOP.
+        */
+       if (!login->first_request)
+               return 0;
 
        ret = wait_for_completion_interruptible(&isert_conn->conn_login_comp);
        if (ret)
@@ -2393,12 +2700,12 @@ static void isert_free_conn(struct iscsi_conn *conn)
 static struct iscsit_transport iser_target_transport = {
        .name                   = "IB/iSER",
        .transport_type         = ISCSI_INFINIBAND,
+       .priv_size              = sizeof(struct isert_cmd),
        .owner                  = THIS_MODULE,
        .iscsit_setup_np        = isert_setup_np,
        .iscsit_accept_np       = isert_accept_np,
        .iscsit_free_np         = isert_free_np,
        .iscsit_free_conn       = isert_free_conn,
-       .iscsit_alloc_cmd       = isert_alloc_cmd,
        .iscsit_get_login_rx    = isert_get_login_rx,
        .iscsit_put_login_tx    = isert_put_login_tx,
        .iscsit_immediate_queue = isert_immediate_queue,
@@ -2425,21 +2732,10 @@ static int __init isert_init(void)
                goto destroy_rx_wq;
        }
 
-       isert_cmd_cache = kmem_cache_create("isert_cmd_cache",
-                       sizeof(struct isert_cmd), __alignof__(struct isert_cmd),
-                       0, NULL);
-       if (!isert_cmd_cache) {
-               pr_err("Unable to create isert_cmd_cache\n");
-               ret = -ENOMEM;
-               goto destroy_tx_cq;
-       }
-
        iscsit_register_transport(&iser_target_transport);
        pr_debug("iSER_TARGET[0] - Loaded iser_target_transport\n");
        return 0;
 
-destroy_tx_cq:
-       destroy_workqueue(isert_comp_wq);
 destroy_rx_wq:
        destroy_workqueue(isert_rx_wq);
        return ret;
@@ -2447,7 +2743,6 @@ destroy_rx_wq:
 
 static void __exit isert_exit(void)
 {
-       kmem_cache_destroy(isert_cmd_cache);
        destroy_workqueue(isert_comp_wq);
        destroy_workqueue(isert_rx_wq);
        iscsit_unregister_transport(&iser_target_transport);
index 191117b..631f209 100644 (file)
@@ -5,6 +5,7 @@
 #include <rdma/rdma_cm.h>
 
 #define ISERT_RDMA_LISTEN_BACKLOG      10
+#define ISCSI_ISER_SG_TABLESIZE                256
 
 enum isert_desc_type {
        ISCSI_TX_CONTROL,
@@ -45,15 +46,26 @@ struct iser_tx_desc {
        struct ib_send_wr send_wr;
 } __packed;
 
+struct fast_reg_descriptor {
+       struct list_head        list;
+       struct ib_mr            *data_mr;
+       struct ib_fast_reg_page_list    *data_frpl;
+       bool                    valid;
+};
+
 struct isert_rdma_wr {
        struct list_head        wr_list;
        struct isert_cmd        *isert_cmd;
        enum iser_ib_op_code    iser_ib_op;
        struct ib_sge           *ib_sge;
+       struct ib_sge           s_ib_sge;
        int                     num_sge;
        struct scatterlist      *sge;
        int                     send_wr_num;
        struct ib_send_wr       *send_wr;
+       struct ib_send_wr       s_send_wr;
+       u32                     cur_rdma_length;
+       struct fast_reg_descriptor *fr_desc;
 };
 
 struct isert_cmd {
@@ -67,8 +79,7 @@ struct isert_cmd {
        u32                     write_va_off;
        u32                     rdma_wr_num;
        struct isert_conn       *conn;
-       struct iscsi_cmd        iscsi_cmd;
-       struct ib_sge           *ib_sge;
+       struct iscsi_cmd        *iscsi_cmd;
        struct iser_tx_desc     tx_desc;
        struct isert_rdma_wr    rdma_wr;
        struct work_struct      comp_work;
@@ -106,6 +117,10 @@ struct isert_conn {
        wait_queue_head_t       conn_wait;
        wait_queue_head_t       conn_wait_comp_err;
        struct kref             conn_kref;
+       struct list_head        conn_frwr_pool;
+       int                     conn_frwr_pool_size;
+       /* lock to protect frwr_pool */
+       spinlock_t              conn_lock;
 };
 
 #define ISERT_MAX_CQ 64
@@ -118,6 +133,7 @@ struct isert_cq_desc {
 };
 
 struct isert_device {
+       int                     use_frwr;
        int                     cqs_used;
        int                     refcount;
        int                     cq_active_qps[ISERT_MAX_CQ];
@@ -128,6 +144,12 @@ struct isert_device {
        struct ib_cq            *dev_tx_cq[ISERT_MAX_CQ];
        struct isert_cq_desc    *cq_desc;
        struct list_head        dev_node;
+       struct ib_device_attr   dev_attr;
+       int                     (*reg_rdma_mem)(struct iscsi_conn *conn,
+                                                   struct iscsi_cmd *cmd,
+                                                   struct isert_rdma_wr *wr);
+       void                    (*unreg_rdma_mem)(struct isert_cmd *isert_cmd,
+                                                 struct isert_conn *isert_conn);
 };
 
 struct isert_np {
index d2b34fb..b6ded17 100644 (file)
@@ -48,6 +48,7 @@ struct evdev_client {
        struct evdev *evdev;
        struct list_head node;
        int clkid;
+       bool revoked;
        unsigned int bufsize;
        struct input_event buffer[];
 };
@@ -164,6 +165,9 @@ static void evdev_pass_values(struct evdev_client *client,
        struct input_event event;
        bool wakeup = false;
 
+       if (client->revoked)
+               return;
+
        event.time = ktime_to_timeval(client->clkid == CLOCK_MONOTONIC ?
                                      mono : real);
 
@@ -240,7 +244,7 @@ static int evdev_flush(struct file *file, fl_owner_t id)
        if (retval)
                return retval;
 
-       if (!evdev->exist)
+       if (!evdev->exist || client->revoked)
                retval = -ENODEV;
        else
                retval = input_flush_device(&evdev->handle, file);
@@ -429,7 +433,7 @@ static ssize_t evdev_write(struct file *file, const char __user *buffer,
        if (retval)
                return retval;
 
-       if (!evdev->exist) {
+       if (!evdev->exist || client->revoked) {
                retval = -ENODEV;
                goto out;
        }
@@ -482,7 +486,7 @@ static ssize_t evdev_read(struct file *file, char __user *buffer,
                return -EINVAL;
 
        for (;;) {
-               if (!evdev->exist)
+               if (!evdev->exist || client->revoked)
                        return -ENODEV;
 
                if (client->packet_head == client->tail &&
@@ -511,7 +515,7 @@ static ssize_t evdev_read(struct file *file, char __user *buffer,
                if (!(file->f_flags & O_NONBLOCK)) {
                        error = wait_event_interruptible(evdev->wait,
                                        client->packet_head != client->tail ||
-                                       !evdev->exist);
+                                       !evdev->exist || client->revoked);
                        if (error)
                                return error;
                }
@@ -529,7 +533,11 @@ static unsigned int evdev_poll(struct file *file, poll_table *wait)
 
        poll_wait(file, &evdev->wait, wait);
 
-       mask = evdev->exist ? POLLOUT | POLLWRNORM : POLLHUP | POLLERR;
+       if (evdev->exist && !client->revoked)
+               mask = POLLOUT | POLLWRNORM;
+       else
+               mask = POLLHUP | POLLERR;
+
        if (client->packet_head != client->tail)
                mask |= POLLIN | POLLRDNORM;
 
@@ -795,6 +803,17 @@ static int evdev_handle_mt_request(struct input_dev *dev,
        return 0;
 }
 
+static int evdev_revoke(struct evdev *evdev, struct evdev_client *client,
+                       struct file *file)
+{
+       client->revoked = true;
+       evdev_ungrab(evdev, client);
+       input_flush_device(&evdev->handle, file);
+       wake_up_interruptible(&evdev->wait);
+
+       return 0;
+}
+
 static long evdev_do_ioctl(struct file *file, unsigned int cmd,
                           void __user *p, int compat_mode)
 {
@@ -857,6 +876,12 @@ static long evdev_do_ioctl(struct file *file, unsigned int cmd,
                else
                        return evdev_ungrab(evdev, client);
 
+       case EVIOCREVOKE:
+               if (p)
+                       return -EINVAL;
+               else
+                       return evdev_revoke(evdev, client, file);
+
        case EVIOCSCLOCKID:
                if (copy_from_user(&i, p, sizeof(unsigned int)))
                        return -EFAULT;
@@ -1002,7 +1027,7 @@ static long evdev_ioctl_handler(struct file *file, unsigned int cmd,
        if (retval)
                return retval;
 
-       if (!evdev->exist) {
+       if (!evdev->exist || client->revoked) {
                retval = -ENODEV;
                goto out;
        }
index 269d4c3..c1edd39 100644 (file)
@@ -224,7 +224,7 @@ config KEYBOARD_TCA6416
 
 config KEYBOARD_TCA8418
        tristate "TCA8418 Keypad Support"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        select INPUT_MATRIXKMAP
        help
          This driver implements basic keypad functionality
@@ -303,7 +303,7 @@ config KEYBOARD_HP7XX
 
 config KEYBOARD_LM8323
        tristate "LM8323 keypad chip"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        depends on LEDS_CLASS
        help
          If you say yes here you get support for the National Semiconductor
index 1e691a3..33b3e88 100644 (file)
@@ -239,7 +239,6 @@ config SERIO_PS2MULT
 
 config SERIO_ARC_PS2
        tristate "ARC PS/2 support"
-       depends on GENERIC_HARDIRQS
        help
          Say Y here if you have an ARC FPGA platform with a PS/2
          controller in it.
index 3b9758b..e09ec67 100644 (file)
@@ -389,7 +389,7 @@ config TOUCHSCREEN_MCS5000
 
 config TOUCHSCREEN_MMS114
        tristate "MELFAS MMS114 touchscreen"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        help
          Say Y here if you have the MELFAS MMS114 touchscreen controller
          chip in your system.
@@ -845,7 +845,7 @@ config TOUCHSCREEN_TSC_SERIO
 
 config TOUCHSCREEN_TSC2005
         tristate "TSC2005 based touchscreens"
-        depends on SPI_MASTER && GENERIC_HARDIRQS
+        depends on SPI_MASTER
         help
           Say Y here if you have a TSC2005 based touchscreen.
 
index 820d85c..fe302e3 100644 (file)
@@ -17,6 +17,16 @@ config OF_IOMMU
        def_bool y
        depends on OF
 
+config FSL_PAMU
+       bool "Freescale IOMMU support"
+       depends on PPC_E500MC
+       select IOMMU_API
+       select GENERIC_ALLOCATOR
+       help
+         Freescale PAMU support. PAMU is the IOMMU present on Freescale QorIQ platforms.
+         PAMU can authorize memory access, remap the memory address, and remap I/O
+         transaction types.
+
 # MSM IOMMU support
 config MSM_IOMMU
        bool "MSM IOMMU Support"
index bbe7041..14c1f47 100644 (file)
@@ -16,3 +16,4 @@ obj-$(CONFIG_TEGRA_IOMMU_SMMU) += tegra-smmu.o
 obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o
 obj-$(CONFIG_SHMOBILE_IOMMU) += shmobile-iommu.o
 obj-$(CONFIG_SHMOBILE_IPMMU) += shmobile-ipmmu.o
+obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o
index 6dc6594..72531f0 100644 (file)
@@ -456,8 +456,10 @@ static int iommu_init_device(struct device *dev)
        }
 
        ret = init_iommu_group(dev);
-       if (ret)
+       if (ret) {
+               free_dev_data(dev_data);
                return ret;
+       }
 
        if (pci_iommuv2_capable(pdev)) {
                struct amd_iommu *iommu;
index 7acbf35..8f798be 100644 (file)
@@ -1384,7 +1384,7 @@ static int iommu_init_msi(struct amd_iommu *iommu)
        if (iommu->int_enabled)
                goto enable_faults;
 
-       if (pci_find_capability(iommu->dev, PCI_CAP_ID_MSI))
+       if (iommu->dev->msi_cap)
                ret = iommu_setup_msi(iommu);
        else
                ret = -ENODEV;
index ebd0a4c..f417e89 100644 (file)
@@ -56,9 +56,6 @@
 /* Maximum number of mapping groups per SMMU */
 #define ARM_SMMU_MAX_SMRS              128
 
-/* Number of VMIDs per SMMU */
-#define ARM_SMMU_NUM_VMIDS             256
-
 /* SMMU global address space */
 #define ARM_SMMU_GR0(smmu)             ((smmu)->base)
 #define ARM_SMMU_GR1(smmu)             ((smmu)->base + (smmu)->pagesize)
@@ -87,6 +84,7 @@
 #define ARM_SMMU_PTE_AP_UNPRIV         (((pteval_t)1) << 6)
 #define ARM_SMMU_PTE_AP_RDONLY         (((pteval_t)2) << 6)
 #define ARM_SMMU_PTE_ATTRINDX_SHIFT    2
+#define ARM_SMMU_PTE_nG                        (((pteval_t)1) << 11)
 
 /* Stage-2 PTE */
 #define ARM_SMMU_PTE_HAP_FAULT         (((pteval_t)0) << 6)
 #define ARM_SMMU_CB_FAR_LO             0x60
 #define ARM_SMMU_CB_FAR_HI             0x64
 #define ARM_SMMU_CB_FSYNR0             0x68
+#define ARM_SMMU_CB_S1_TLBIASID                0x610
 
 #define SCTLR_S1_ASIDPNE               (1 << 12)
 #define SCTLR_CFCFG                    (1 << 7)
 #define TTBCR2_ADDR_44                 4
 #define TTBCR2_ADDR_48                 5
 
+#define TTBRn_HI_ASID_SHIFT            16
+
 #define MAIR_ATTR_SHIFT(n)             ((n) << 3)
 #define MAIR_ATTR_MASK                 0xff
 #define MAIR_ATTR_DEVICE               0x04
 #define FSR_IGN                                (FSR_AFF | FSR_ASF | FSR_TLBMCF |       \
                                         FSR_TLBLKF)
 #define FSR_FAULT                      (FSR_MULTI | FSR_SS | FSR_UUT |         \
-                                        FSR_EF | FSR_PF | FSR_TF)
+                                        FSR_EF | FSR_PF | FSR_TF | FSR_IGN)
 
 #define FSYNR0_WNR                     (1 << 4)
 
@@ -365,21 +366,21 @@ struct arm_smmu_device {
        u32                             num_context_irqs;
        unsigned int                    *irqs;
 
-       DECLARE_BITMAP(vmid_map, ARM_SMMU_NUM_VMIDS);
-
        struct list_head                list;
        struct rb_root                  masters;
 };
 
 struct arm_smmu_cfg {
        struct arm_smmu_device          *smmu;
-       u8                              vmid;
        u8                              cbndx;
        u8                              irptndx;
        u32                             cbar;
        pgd_t                           *pgd;
 };
 
+#define ARM_SMMU_CB_ASID(cfg)          ((cfg)->cbndx)
+#define ARM_SMMU_CB_VMID(cfg)          ((cfg)->cbndx + 1)
+
 struct arm_smmu_domain {
        /*
         * A domain can span across multiple, chained SMMUs and requires
@@ -533,6 +534,25 @@ static void arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
        }
 }
 
+static void arm_smmu_tlb_inv_context(struct arm_smmu_cfg *cfg)
+{
+       struct arm_smmu_device *smmu = cfg->smmu;
+       void __iomem *base = ARM_SMMU_GR0(smmu);
+       bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
+
+       if (stage1) {
+               base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx);
+               writel_relaxed(ARM_SMMU_CB_ASID(cfg),
+                              base + ARM_SMMU_CB_S1_TLBIASID);
+       } else {
+               base = ARM_SMMU_GR0(smmu);
+               writel_relaxed(ARM_SMMU_CB_VMID(cfg),
+                              base + ARM_SMMU_GR0_TLBIVMID);
+       }
+
+       arm_smmu_tlb_sync(smmu);
+}
+
 static irqreturn_t arm_smmu_context_fault(int irq, void *dev)
 {
        int flags, ret;
@@ -590,6 +610,9 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
        gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR);
+       if (!gfsr)
+               return IRQ_NONE;
+
        gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0);
        gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1);
        gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2);
@@ -601,7 +624,7 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev)
                gfsr, gfsynr0, gfsynr1, gfsynr2);
 
        writel(gfsr, gr0_base + ARM_SMMU_GR0_sGFSR);
-       return IRQ_NONE;
+       return IRQ_HANDLED;
 }
 
 static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
@@ -618,14 +641,15 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
        cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
 
        /* CBAR */
-       reg = root_cfg->cbar |
-             (root_cfg->vmid << CBAR_VMID_SHIFT);
+       reg = root_cfg->cbar;
        if (smmu->version == 1)
              reg |= root_cfg->irptndx << CBAR_IRPTNDX_SHIFT;
 
        /* Use the weakest memory type, so it is overridden by the pte */
        if (stage1)
                reg |= (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT);
+       else
+               reg |= ARM_SMMU_CB_VMID(root_cfg) << CBAR_VMID_SHIFT;
        writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(root_cfg->cbndx));
 
        if (smmu->version > 1) {
@@ -687,15 +711,11 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
 
        /* TTBR0 */
        reg = __pa(root_cfg->pgd);
-#ifndef __BIG_ENDIAN
        writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
        reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32;
+       if (stage1)
+               reg |= ARM_SMMU_CB_ASID(root_cfg) << TTBRn_HI_ASID_SHIFT;
        writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-#else
-       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_HI);
-       reg = (phys_addr_t)__pa(root_cfg->pgd) >> 32;
-       writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0_LO);
-#endif
 
        /*
         * TTBCR
@@ -750,10 +770,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain)
                writel_relaxed(reg, cb_base + ARM_SMMU_CB_S1_MAIR0);
        }
 
-       /* Nuke the TLB */
-       writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID);
-       arm_smmu_tlb_sync(smmu);
-
        /* SCTLR */
        reg = SCTLR_CFCFG | SCTLR_CFIE | SCTLR_CFRE | SCTLR_M | SCTLR_EAE_SBOP;
        if (stage1)
@@ -790,11 +806,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
                return -ENODEV;
        }
 
-       ret = __arm_smmu_alloc_bitmap(smmu->vmid_map, 0, ARM_SMMU_NUM_VMIDS);
-       if (IS_ERR_VALUE(ret))
-               return ret;
-
-       root_cfg->vmid = ret;
        if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) {
                /*
                 * We will likely want to change this if/when KVM gets
@@ -813,10 +824,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
        ret = __arm_smmu_alloc_bitmap(smmu->context_map, start,
                                      smmu->num_context_banks);
        if (IS_ERR_VALUE(ret))
-               goto out_free_vmid;
+               return ret;
 
        root_cfg->cbndx = ret;
-
        if (smmu->version == 1) {
                root_cfg->irptndx = atomic_inc_return(&smmu->irptndx);
                root_cfg->irptndx %= smmu->num_context_irqs;
@@ -840,8 +850,6 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain,
 
 out_free_context:
        __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
-out_free_vmid:
-       __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid);
        return ret;
 }
 
@@ -850,17 +858,22 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain)
        struct arm_smmu_domain *smmu_domain = domain->priv;
        struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
        struct arm_smmu_device *smmu = root_cfg->smmu;
+       void __iomem *cb_base;
        int irq;
 
        if (!smmu)
                return;
 
+       /* Disable the context bank and nuke the TLB before freeing it. */
+       cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, root_cfg->cbndx);
+       writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR);
+       arm_smmu_tlb_inv_context(root_cfg);
+
        if (root_cfg->irptndx != -1) {
                irq = smmu->irqs[smmu->num_global_irqs + root_cfg->irptndx];
                free_irq(irq, domain);
        }
 
-       __arm_smmu_free_bitmap(smmu->vmid_map, root_cfg->vmid);
        __arm_smmu_free_bitmap(smmu->context_map, root_cfg->cbndx);
 }
 
@@ -959,6 +972,11 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain)
 static void arm_smmu_domain_destroy(struct iommu_domain *domain)
 {
        struct arm_smmu_domain *smmu_domain = domain->priv;
+
+       /*
+        * Free the domain resources. We assume that all devices have
+        * already been detached.
+        */
        arm_smmu_destroy_domain_context(domain);
        arm_smmu_free_pgtables(smmu_domain);
        kfree(smmu_domain);
@@ -1199,7 +1217,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
        }
 
        if (stage == 1) {
-               pteval |= ARM_SMMU_PTE_AP_UNPRIV;
+               pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG;
                if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ))
                        pteval |= ARM_SMMU_PTE_AP_RDONLY;
 
@@ -1415,13 +1433,9 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova,
 {
        int ret;
        struct arm_smmu_domain *smmu_domain = domain->priv;
-       struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg;
-       struct arm_smmu_device *smmu = root_cfg->smmu;
-       void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
 
        ret = arm_smmu_handle_mapping(smmu_domain, iova, 0, size, 0);
-       writel_relaxed(root_cfg->vmid, gr0_base + ARM_SMMU_GR0_TLBIVMID);
-       arm_smmu_tlb_sync(smmu);
+       arm_smmu_tlb_inv_context(&smmu_domain->root_cfg);
        return ret ? ret : size;
 }
 
@@ -1544,6 +1558,7 @@ static struct iommu_ops arm_smmu_ops = {
 static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
 {
        void __iomem *gr0_base = ARM_SMMU_GR0(smmu);
+       void __iomem *sctlr_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB_SCTLR;
        int i = 0;
        u32 scr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0);
 
@@ -1553,6 +1568,10 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu)
                writel_relaxed(S2CR_TYPE_BYPASS, gr0_base + ARM_SMMU_GR0_S2CR(i));
        }
 
+       /* Make sure all context banks are disabled */
+       for (i = 0; i < smmu->num_context_banks; ++i)
+               writel_relaxed(0, sctlr_base + ARM_SMMU_CB(smmu, i));
+
        /* Invalidate the TLB, just in case */
        writel_relaxed(0, gr0_base + ARM_SMMU_GR0_STLBIALL);
        writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH);
@@ -1906,7 +1925,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev)
                of_node_put(master->of_node);
        }
 
-       if (!bitmap_empty(smmu->vmid_map, ARM_SMMU_NUM_VMIDS))
+       if (!bitmap_empty(smmu->context_map, ARM_SMMU_MAX_CBS))
                dev_err(dev, "removing device with active domains!\n");
 
        for (i = 0; i < smmu->num_global_irqs; ++i)
index 3f32d64..0740189 100644 (file)
@@ -247,50 +247,6 @@ static void __sysmmu_set_prefbuf(void __iomem *sfrbase, unsigned long base,
        __raw_writel(size - 1 + base,  sfrbase + REG_PB0_EADDR + idx * 8);
 }
 
-void exynos_sysmmu_set_prefbuf(struct device *dev,
-                               unsigned long base0, unsigned long size0,
-                               unsigned long base1, unsigned long size1)
-{
-       struct sysmmu_drvdata *data = dev_get_drvdata(dev->archdata.iommu);
-       unsigned long flags;
-       int i;
-
-       BUG_ON((base0 + size0) <= base0);
-       BUG_ON((size1 > 0) && ((base1 + size1) <= base1));
-
-       read_lock_irqsave(&data->lock, flags);
-       if (!is_sysmmu_active(data))
-               goto finish;
-
-       for (i = 0; i < data->nsfrs; i++) {
-               if ((readl(data->sfrbases[i] + REG_MMU_VERSION) >> 28) == 3) {
-                       if (!sysmmu_block(data->sfrbases[i]))
-                               continue;
-
-                       if (size1 == 0) {
-                               if (size0 <= SZ_128K) {
-                                       base1 = base0;
-                                       size1 = size0;
-                               } else {
-                                       size1 = size0 -
-                                               ALIGN(size0 / 2, SZ_64K);
-                                       size0 = size0 - size1;
-                                       base1 = base0 + size0;
-                               }
-                       }
-
-                       __sysmmu_set_prefbuf(
-                                       data->sfrbases[i], base0, size0, 0);
-                       __sysmmu_set_prefbuf(
-                                       data->sfrbases[i], base1, size1, 1);
-
-                       sysmmu_unblock(data->sfrbases[i]);
-               }
-       }
-finish:
-       read_unlock_irqrestore(&data->lock, flags);
-}
-
 static void __set_fault_handler(struct sysmmu_drvdata *data,
                                        sysmmu_fault_handler_t handler)
 {
diff --git a/drivers/iommu/fsl_pamu.c b/drivers/iommu/fsl_pamu.c
new file mode 100644 (file)
index 0000000..cba0498
--- /dev/null
@@ -0,0 +1,1309 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/genalloc.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+#include <asm/fsl_guts.h>
+
+#include "fsl_pamu.h"
+
+/* define indexes for each operation mapping scenario */
+#define OMI_QMAN        0x00
+#define OMI_FMAN        0x01
+#define OMI_QMAN_PRIV   0x02
+#define OMI_CAAM        0x03
+
+#define make64(high, low) (((u64)(high) << 32) | (low))
+
+struct pamu_isr_data {
+       void __iomem *pamu_reg_base;    /* Base address of PAMU regs*/
+       unsigned int count;             /* The number of PAMUs */
+};
+
+static struct paace *ppaact;
+static struct paace *spaact;
+static struct ome *omt;
+
+/*
+ * Table for matching compatible strings, for device tree
+ * guts node, for QorIQ SOCs.
+ * "fsl,qoriq-device-config-2.0" corresponds to T4 & B4
+ * SOCs. For the older SOCs "fsl,qoriq-device-config-1.0"
+ * string would be used.
+*/
+static const struct of_device_id guts_device_ids[] = {
+       { .compatible = "fsl,qoriq-device-config-1.0", },
+       { .compatible = "fsl,qoriq-device-config-2.0", },
+       {}
+};
+
+
+/*
+ * Table for matching compatible strings, for device tree
+ * L3 cache controller node.
+ * "fsl,t4240-l3-cache-controller" corresponds to T4,
+ * "fsl,b4860-l3-cache-controller" corresponds to B4 &
+ * "fsl,p4080-l3-cache-controller" corresponds to other,
+ * SOCs.
+*/
+static const struct of_device_id l3_device_ids[] = {
+       { .compatible = "fsl,t4240-l3-cache-controller", },
+       { .compatible = "fsl,b4860-l3-cache-controller", },
+       { .compatible = "fsl,p4080-l3-cache-controller", },
+       {}
+};
+
+/* maximum subwindows permitted per liodn */
+static u32 max_subwindow_count;
+
+/* Pool for fspi allocation */
+struct gen_pool *spaace_pool;
+
+/**
+ * pamu_get_max_subwin_cnt() - Return the maximum supported
+ * subwindow count per liodn.
+ *
+ */
+u32 pamu_get_max_subwin_cnt()
+{
+       return max_subwindow_count;
+}
+
+/**
+ * pamu_get_ppaace() - Return the primary PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns the ppace pointer upon success else return
+ * null.
+ */
+static struct paace *pamu_get_ppaace(int liodn)
+{
+       if (!ppaact || liodn >= PAACE_NUMBER_ENTRIES) {
+               pr_debug("PPAACT doesn't exist\n");
+               return NULL;
+       }
+
+       return &ppaact[liodn];
+}
+
+/**
+ * pamu_enable_liodn() - Set valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_enable_liodn(int liodn)
+{
+       struct paace *ppaace;
+
+       ppaace = pamu_get_ppaace(liodn);
+       if (!ppaace) {
+               pr_debug("Invalid primary paace entry\n");
+               return -ENOENT;
+       }
+
+       if (!get_bf(ppaace->addr_bitfields, PPAACE_AF_WSE)) {
+               pr_debug("liodn %d not configured\n", liodn);
+               return -EINVAL;
+       }
+
+       /* Ensure that all other stores to the ppaace complete first */
+       mb();
+
+       set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID);
+       mb();
+
+       return 0;
+}
+
+/**
+ * pamu_disable_liodn() - Clears valid bit of PACCE
+ * @liodn: liodn PAACT index for desired PAACE
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_disable_liodn(int liodn)
+{
+       struct paace *ppaace;
+
+       ppaace = pamu_get_ppaace(liodn);
+       if (!ppaace) {
+               pr_debug("Invalid primary paace entry\n");
+               return -ENOENT;
+       }
+
+       set_bf(ppaace->addr_bitfields, PAACE_AF_V, PAACE_V_INVALID);
+       mb();
+
+       return 0;
+}
+
+/* Derive the window size encoding for a particular PAACE entry */
+static unsigned int map_addrspace_size_to_wse(phys_addr_t addrspace_size)
+{
+       /* Bug if not a power of 2 */
+       BUG_ON(!is_power_of_2(addrspace_size));
+
+       /* window size is 2^(WSE+1) bytes */
+       return __ffs(addrspace_size) - 1;
+}
+
+/* Derive the PAACE window count encoding for the subwindow count */
+static unsigned int map_subwindow_cnt_to_wce(u32 subwindow_cnt)
+{
+       /* window count is 2^(WCE+1) bytes */
+       return __ffs(subwindow_cnt) - 1;
+}
+
+/*
+ * Set the PAACE type as primary and set the coherency required domain
+ * attribute
+ */
+static void pamu_init_ppaace(struct paace *ppaace)
+{
+       set_bf(ppaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_PRIMARY);
+
+       set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+              PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Set the PAACE type as secondary and set the coherency required domain
+ * attribute.
+ */
+static void pamu_init_spaace(struct paace *spaace)
+{
+       set_bf(spaace->addr_bitfields, PAACE_AF_PT, PAACE_PT_SECONDARY);
+       set_bf(spaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+              PAACE_M_COHERENCE_REQ);
+}
+
+/*
+ * Return the spaace (corresponding to the secondary window index)
+ * for a particular ppaace.
+ */
+static struct paace *pamu_get_spaace(struct paace *paace, u32 wnum)
+{
+       u32 subwin_cnt;
+       struct paace *spaace = NULL;
+
+       subwin_cnt = 1UL << (get_bf(paace->impl_attr, PAACE_IA_WCE) + 1);
+
+       if (wnum < subwin_cnt)
+               spaace = &spaact[paace->fspi + wnum];
+       else
+               pr_debug("secondary paace out of bounds\n");
+
+       return spaace;
+}
+
+/**
+ * pamu_get_fspi_and_allocate() - Allocates fspi index and reserves subwindows
+ *                                required for primary PAACE in the secondary
+ *                                PAACE table.
+ * @subwin_cnt: Number of subwindows to be reserved.
+ *
+ * A PPAACE entry may have a number of associated subwindows. A subwindow
+ * corresponds to a SPAACE entry in the SPAACT table. Each PAACE entry stores
+ * the index (fspi) of the first SPAACE entry in the SPAACT table. This
+ * function returns the index of the first SPAACE entry. The remaining
+ * SPAACE entries are reserved contiguously from that index.
+ *
+ * Returns a valid fspi index in the range of 0 - SPAACE_NUMBER_ENTRIES on success.
+ * If no SPAACE entry is available or the allocator can not reserve the required
+ * number of contiguous entries function returns ULONG_MAX indicating a failure.
+ *
+*/
+static unsigned long pamu_get_fspi_and_allocate(u32 subwin_cnt)
+{
+       unsigned long spaace_addr;
+
+       spaace_addr = gen_pool_alloc(spaace_pool, subwin_cnt * sizeof(struct paace));
+       if (!spaace_addr)
+               return ULONG_MAX;
+
+       return (spaace_addr - (unsigned long)spaact) / (sizeof(struct paace));
+}
+
+/* Release the subwindows reserved for a particular LIODN */
+void pamu_free_subwins(int liodn)
+{
+       struct paace *ppaace;
+       u32 subwin_cnt, size;
+
+       ppaace = pamu_get_ppaace(liodn);
+       if (!ppaace) {
+               pr_debug("Invalid liodn entry\n");
+               return;
+       }
+
+       if (get_bf(ppaace->addr_bitfields, PPAACE_AF_MW)) {
+               subwin_cnt = 1UL << (get_bf(ppaace->impl_attr, PAACE_IA_WCE) + 1);
+               size = (subwin_cnt - 1) * sizeof(struct paace);
+               gen_pool_free(spaace_pool, (unsigned long)&spaact[ppaace->fspi], size);
+               set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+       }
+}
+
+/*
+ * Function used for updating stash destination for the coressponding
+ * LIODN.
+ */
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value)
+{
+       struct paace *paace;
+
+       paace = pamu_get_ppaace(liodn);
+       if (!paace) {
+               pr_debug("Invalid liodn entry\n");
+               return -ENOENT;
+       }
+       if (subwin) {
+               paace = pamu_get_spaace(paace, subwin - 1);
+               if (!paace) {
+                       return -ENOENT;
+               }
+       }
+       set_bf(paace->impl_attr, PAACE_IA_CID, value);
+
+       mb();
+
+       return 0;
+}
+
+/* Disable a subwindow corresponding to the LIODN */
+int pamu_disable_spaace(int liodn, u32 subwin)
+{
+       struct paace *paace;
+
+       paace = pamu_get_ppaace(liodn);
+       if (!paace) {
+               pr_debug("Invalid liodn entry\n");
+               return -ENOENT;
+       }
+       if (subwin) {
+               paace = pamu_get_spaace(paace, subwin - 1);
+               if (!paace) {
+                       return -ENOENT;
+               }
+               set_bf(paace->addr_bitfields, PAACE_AF_V,
+                        PAACE_V_INVALID);
+       } else {
+               set_bf(paace->addr_bitfields, PAACE_AF_AP,
+                        PAACE_AP_PERMS_DENIED);
+       }
+
+       mb();
+
+       return 0;
+}
+
+
+/**
+ * pamu_config_paace() - Sets up PPAACE entry for specified liodn
+ *
+ * @liodn: Logical IO device number
+ * @win_addr: starting address of DSA window
+ * @win-size: size of DSA window
+ * @omi: Operation mapping index -- if ~omi == 0 then omi not defined
+ * @rpn: real (true physical) page number
+ * @stashid: cache stash id for associated cpu -- if ~stashid == 0 then
+ *          stashid not defined
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *          snoopid not defined
+ * @subwin_cnt: number of sub-windows
+ * @prot: window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+                      u32 omi, unsigned long rpn, u32 snoopid, u32 stashid,
+                      u32 subwin_cnt, int prot)
+{
+       struct paace *ppaace;
+       unsigned long fspi;
+
+       if (!is_power_of_2(win_size) || win_size < PAMU_PAGE_SIZE) {
+               pr_debug("window size too small or not a power of two %llx\n", win_size);
+               return -EINVAL;
+       }
+
+       if (win_addr & (win_size - 1)) {
+               pr_debug("window address is not aligned with window size\n");
+               return -EINVAL;
+       }
+
+       ppaace = pamu_get_ppaace(liodn);
+       if (!ppaace) {
+               return -ENOENT;
+       }
+
+       /* window size is 2^(WSE+1) bytes */
+       set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE,
+               map_addrspace_size_to_wse(win_size));
+
+       pamu_init_ppaace(ppaace);
+
+       ppaace->wbah = win_addr >> (PAMU_PAGE_SHIFT + 20);
+       set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL,
+              (win_addr >> PAMU_PAGE_SHIFT));
+
+       /* set up operation mapping if it's configured */
+       if (omi < OME_NUMBER_ENTRIES) {
+               set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+               ppaace->op_encode.index_ot.omi = omi;
+       } else if (~omi != 0) {
+               pr_debug("bad operation mapping index: %d\n", omi);
+               return -EINVAL;
+       }
+
+       /* configure stash id */
+       if (~stashid != 0)
+               set_bf(ppaace->impl_attr, PAACE_IA_CID, stashid);
+
+       /* configure snoop id */
+       if (~snoopid != 0)
+               ppaace->domain_attr.to_host.snpid = snoopid;
+
+       if (subwin_cnt) {
+               /* The first entry is in the primary PAACE instead */
+               fspi = pamu_get_fspi_and_allocate(subwin_cnt - 1);
+               if (fspi == ULONG_MAX) {
+                       pr_debug("spaace indexes exhausted\n");
+                       return -EINVAL;
+               }
+
+               /* window count is 2^(WCE+1) bytes */
+               set_bf(ppaace->impl_attr, PAACE_IA_WCE,
+                      map_subwindow_cnt_to_wce(subwin_cnt));
+               set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0x1);
+               ppaace->fspi = fspi;
+       } else {
+               set_bf(ppaace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+               ppaace->twbah = rpn >> 20;
+               set_bf(ppaace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+               set_bf(ppaace->addr_bitfields, PAACE_AF_AP, prot);
+               set_bf(ppaace->impl_attr, PAACE_IA_WCE, 0);
+               set_bf(ppaace->addr_bitfields, PPAACE_AF_MW, 0);
+       }
+       mb();
+
+       return 0;
+}
+
+/**
+ * pamu_config_spaace() - Sets up SPAACE entry for specified subwindow
+ *
+ * @liodn:  Logical IO device number
+ * @subwin_cnt:  number of sub-windows associated with dma-window
+ * @subwin: subwindow index
+ * @subwin_size: size of subwindow
+ * @omi: Operation mapping index
+ * @rpn: real (true physical) page number
+ * @snoopid: snoop id for hardware coherency -- if ~snoopid == 0 then
+ *                       snoopid not defined
+ * @stashid: cache stash id for associated cpu
+ * @enable: enable/disable subwindow after reconfiguration
+ * @prot: sub window permissions
+ *
+ * Returns 0 upon success else error code < 0 returned
+ */
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin,
+                      phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+                      u32 snoopid, u32 stashid, int enable, int prot)
+{
+       struct paace *paace;
+
+
+       /* setup sub-windows */
+       if (!subwin_cnt) {
+               pr_debug("Invalid subwindow count\n");
+               return -EINVAL;
+       }
+
+       paace = pamu_get_ppaace(liodn);
+       if (subwin > 0 && subwin < subwin_cnt && paace) {
+               paace = pamu_get_spaace(paace, subwin - 1);
+
+               if (paace && !(paace->addr_bitfields & PAACE_V_VALID)) {
+                       pamu_init_spaace(paace);
+                       set_bf(paace->addr_bitfields, SPAACE_AF_LIODN, liodn);
+               }
+       }
+
+       if (!paace) {
+               pr_debug("Invalid liodn entry\n");
+               return -ENOENT;
+       }
+
+       if (!is_power_of_2(subwin_size) || subwin_size < PAMU_PAGE_SIZE) {
+               pr_debug("subwindow size out of range, or not a power of 2\n");
+               return -EINVAL;
+       }
+
+       if (rpn == ULONG_MAX) {
+               pr_debug("real page number out of range\n");
+               return -EINVAL;
+       }
+
+       /* window size is 2^(WSE+1) bytes */
+       set_bf(paace->win_bitfields, PAACE_WIN_SWSE,
+              map_addrspace_size_to_wse(subwin_size));
+
+       set_bf(paace->impl_attr, PAACE_IA_ATM, PAACE_ATM_WINDOW_XLATE);
+       paace->twbah = rpn >> 20;
+       set_bf(paace->win_bitfields, PAACE_WIN_TWBAL, rpn);
+       set_bf(paace->addr_bitfields, PAACE_AF_AP, prot);
+
+       /* configure snoop id */
+       if (~snoopid != 0)
+               paace->domain_attr.to_host.snpid = snoopid;
+
+       /* set up operation mapping if it's configured */
+       if (omi < OME_NUMBER_ENTRIES) {
+               set_bf(paace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+               paace->op_encode.index_ot.omi = omi;
+       } else if (~omi != 0) {
+               pr_debug("bad operation mapping index: %d\n", omi);
+               return -EINVAL;
+       }
+
+       if (~stashid != 0)
+               set_bf(paace->impl_attr, PAACE_IA_CID, stashid);
+
+       smp_wmb();
+
+       if (enable)
+               set_bf(paace->addr_bitfields, PAACE_AF_V, PAACE_V_VALID);
+
+       mb();
+
+       return 0;
+}
+
+/**
+* get_ome_index() - Returns the index in the operation mapping table
+*                   for device.
+* @*omi_index: pointer for storing the index value
+*
+*/
+void get_ome_index(u32 *omi_index, struct device *dev)
+{
+       if (of_device_is_compatible(dev->of_node, "fsl,qman-portal"))
+               *omi_index = OMI_QMAN;
+       if (of_device_is_compatible(dev->of_node, "fsl,qman"))
+               *omi_index = OMI_QMAN_PRIV;
+}
+
+/**
+ * get_stash_id - Returns stash destination id corresponding to a
+ *                cache type and vcpu.
+ * @stash_dest_hint: L1, L2 or L3
+ * @vcpu: vpcu target for a particular cache type.
+ *
+ * Returs stash on success or ~(u32)0 on failure.
+ *
+ */
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu)
+{
+       const u32 *prop;
+       struct device_node *node;
+       u32 cache_level;
+       int len, found = 0;
+       int i;
+
+       /* Fastpath, exit early if L3/CPC cache is target for stashing */
+       if (stash_dest_hint == PAMU_ATTR_CACHE_L3) {
+               node = of_find_matching_node(NULL, l3_device_ids);
+               if (node) {
+                       prop = of_get_property(node, "cache-stash-id", 0);
+                       if (!prop) {
+                               pr_debug("missing cache-stash-id at %s\n", node->full_name);
+                               of_node_put(node);
+                               return ~(u32)0;
+                       }
+                       of_node_put(node);
+                       return be32_to_cpup(prop);
+               }
+               return ~(u32)0;
+       }
+
+       for_each_node_by_type(node, "cpu") {
+               prop = of_get_property(node, "reg", &len);
+               for (i = 0; i < len / sizeof(u32); i++) {
+                       if (be32_to_cpup(&prop[i]) == vcpu) {
+                               found = 1;
+                               goto found_cpu_node;
+                       }
+               }
+       }
+found_cpu_node:
+
+       /* find the hwnode that represents the cache */
+       for (cache_level = PAMU_ATTR_CACHE_L1; (cache_level < PAMU_ATTR_CACHE_L3) && found; cache_level++) {
+               if (stash_dest_hint == cache_level) {
+                       prop = of_get_property(node, "cache-stash-id", 0);
+                       if (!prop) {
+                               pr_debug("missing cache-stash-id at %s\n", node->full_name);
+                               of_node_put(node);
+                               return ~(u32)0;
+                       }
+                       of_node_put(node);
+                       return be32_to_cpup(prop);
+               }
+
+               prop = of_get_property(node, "next-level-cache", 0);
+               if (!prop) {
+                       pr_debug("can't find next-level-cache at %s\n",
+                               node->full_name);
+                       of_node_put(node);
+                       return ~(u32)0;  /* can't traverse any further */
+               }
+               of_node_put(node);
+
+               /* advance to next node in cache hierarchy */
+               node = of_find_node_by_phandle(*prop);
+               if (!node) {
+                       pr_debug("Invalid node for cache hierarchy %s\n",
+                               node->full_name);
+                       return ~(u32)0;
+               }
+       }
+
+       pr_debug("stash dest not found for %d on vcpu %d\n",
+                 stash_dest_hint, vcpu);
+       return ~(u32)0;
+}
+
+/* Identify if the PAACT table entry belongs to QMAN, BMAN or QMAN Portal */
+#define QMAN_PAACE 1
+#define QMAN_PORTAL_PAACE 2
+#define BMAN_PAACE 3
+
+/**
+ * Setup operation mapping and stash destinations for QMAN and QMAN portal.
+ * Memory accesses to QMAN and BMAN private memory need not be coherent, so
+ * clear the PAACE entry coherency attribute for them.
+ */
+static void setup_qbman_paace(struct paace *ppaace, int  paace_type)
+{
+       switch (paace_type) {
+       case QMAN_PAACE:
+               set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+               ppaace->op_encode.index_ot.omi = OMI_QMAN_PRIV;
+               /* setup QMAN Private data stashing for the L3 cache */
+               set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0));
+               set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+                      0);
+               break;
+       case QMAN_PORTAL_PAACE:
+               set_bf(ppaace->impl_attr, PAACE_IA_OTM, PAACE_OTM_INDEXED);
+               ppaace->op_encode.index_ot.omi = OMI_QMAN;
+               /*Set DQRR and Frame stashing for the L3 cache */
+               set_bf(ppaace->impl_attr, PAACE_IA_CID, get_stash_id(PAMU_ATTR_CACHE_L3, 0));
+               break;
+       case BMAN_PAACE:
+               set_bf(ppaace->domain_attr.to_host.coherency_required, PAACE_DA_HOST_CR,
+                      0);
+               break;
+       }
+}
+
+/**
+ * Setup the operation mapping table for various devices. This is a static
+ * table where each table index corresponds to a particular device. PAMU uses
+ * this table to translate device transaction to appropriate corenet
+ * transaction.
+ */
+static void __init setup_omt(struct ome *omt)
+{
+       struct ome *ome;
+
+       /* Configure OMI_QMAN */
+       ome = &omt[OMI_QMAN];
+
+       ome->moe[IOE_READ_IDX] = EOE_VALID | EOE_READ;
+       ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+       ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+       ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSAO;
+
+       ome->moe[IOE_DIRECT0_IDX] = EOE_VALID | EOE_LDEC;
+       ome->moe[IOE_DIRECT1_IDX] = EOE_VALID | EOE_LDECPE;
+
+       /* Configure OMI_FMAN */
+       ome = &omt[OMI_FMAN];
+       ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+       ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+
+       /* Configure OMI_QMAN private */
+       ome = &omt[OMI_QMAN_PRIV];
+       ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READ;
+       ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+       ome->moe[IOE_EREAD0_IDX] = EOE_VALID | EOE_RSA;
+       ome->moe[IOE_EWRITE0_IDX] = EOE_VALID | EOE_WWSA;
+
+       /* Configure OMI_CAAM */
+       ome = &omt[OMI_CAAM];
+       ome->moe[IOE_READ_IDX]  = EOE_VALID | EOE_READI;
+       ome->moe[IOE_WRITE_IDX] = EOE_VALID | EOE_WRITE;
+}
+
+/*
+ * Get the maximum number of PAACT table entries
+ * and subwindows supported by PAMU
+ */
+static void get_pamu_cap_values(unsigned long pamu_reg_base)
+{
+       u32 pc_val;
+
+       pc_val = in_be32((u32 *)(pamu_reg_base + PAMU_PC3));
+       /* Maximum number of subwindows per liodn */
+       max_subwindow_count = 1 << (1 + PAMU_PC3_MWCE(pc_val));
+}
+
+/* Setup PAMU registers pointing to PAACT, SPAACT and OMT */
+int setup_one_pamu(unsigned long pamu_reg_base, unsigned long pamu_reg_size,
+                  phys_addr_t ppaact_phys, phys_addr_t spaact_phys,
+                  phys_addr_t omt_phys)
+{
+       u32 *pc;
+       struct pamu_mmap_regs *pamu_regs;
+
+       pc = (u32 *) (pamu_reg_base + PAMU_PC);
+       pamu_regs = (struct pamu_mmap_regs *)
+               (pamu_reg_base + PAMU_MMAP_REGS_BASE);
+
+       /* set up pointers to corenet control blocks */
+
+       out_be32(&pamu_regs->ppbah, upper_32_bits(ppaact_phys));
+       out_be32(&pamu_regs->ppbal, lower_32_bits(ppaact_phys));
+       ppaact_phys = ppaact_phys + PAACT_SIZE;
+       out_be32(&pamu_regs->pplah, upper_32_bits(ppaact_phys));
+       out_be32(&pamu_regs->pplal, lower_32_bits(ppaact_phys));
+
+       out_be32(&pamu_regs->spbah, upper_32_bits(spaact_phys));
+       out_be32(&pamu_regs->spbal, lower_32_bits(spaact_phys));
+       spaact_phys = spaact_phys + SPAACT_SIZE;
+       out_be32(&pamu_regs->splah, upper_32_bits(spaact_phys));
+       out_be32(&pamu_regs->splal, lower_32_bits(spaact_phys));
+
+       out_be32(&pamu_regs->obah, upper_32_bits(omt_phys));
+       out_be32(&pamu_regs->obal, lower_32_bits(omt_phys));
+       omt_phys = omt_phys + OMT_SIZE;
+       out_be32(&pamu_regs->olah, upper_32_bits(omt_phys));
+       out_be32(&pamu_regs->olal, lower_32_bits(omt_phys));
+
+       /*
+        * set PAMU enable bit,
+        * allow ppaact & omt to be cached
+        * & enable PAMU access violation interrupts.
+        */
+
+       out_be32((u32 *)(pamu_reg_base + PAMU_PICS),
+                       PAMU_ACCESS_VIOLATION_ENABLE);
+       out_be32(pc, PAMU_PC_PE | PAMU_PC_OCE | PAMU_PC_SPCC | PAMU_PC_PPCC);
+       return 0;
+}
+
+/* Enable all device LIODNS */
+static void __init setup_liodns(void)
+{
+       int i, len;
+       struct paace *ppaace;
+       struct device_node *node = NULL;
+       const u32 *prop;
+
+       for_each_node_with_property(node, "fsl,liodn") {
+               prop = of_get_property(node, "fsl,liodn", &len);
+               for (i = 0; i < len / sizeof(u32); i++) {
+                       int liodn;
+
+                       liodn = be32_to_cpup(&prop[i]);
+                       if (liodn >= PAACE_NUMBER_ENTRIES) {
+                               pr_debug("Invalid LIODN value %d\n", liodn);
+                               continue;
+                       }
+                       ppaace = pamu_get_ppaace(liodn);
+                       pamu_init_ppaace(ppaace);
+                       /* window size is 2^(WSE+1) bytes */
+                       set_bf(ppaace->addr_bitfields, PPAACE_AF_WSE, 35);
+                       ppaace->wbah = 0;
+                       set_bf(ppaace->addr_bitfields, PPAACE_AF_WBAL, 0);
+                       set_bf(ppaace->impl_attr, PAACE_IA_ATM,
+                               PAACE_ATM_NO_XLATE);
+                       set_bf(ppaace->addr_bitfields, PAACE_AF_AP,
+                               PAACE_AP_PERMS_ALL);
+                       if (of_device_is_compatible(node, "fsl,qman-portal"))
+                               setup_qbman_paace(ppaace, QMAN_PORTAL_PAACE);
+                       if (of_device_is_compatible(node, "fsl,qman"))
+                               setup_qbman_paace(ppaace, QMAN_PAACE);
+                       if (of_device_is_compatible(node, "fsl,bman"))
+                               setup_qbman_paace(ppaace, BMAN_PAACE);
+                       mb();
+                       pamu_enable_liodn(liodn);
+               }
+       }
+}
+
+irqreturn_t pamu_av_isr(int irq, void *arg)
+{
+       struct pamu_isr_data *data = arg;
+       phys_addr_t phys;
+       unsigned int i, j, ret;
+
+       pr_emerg("access violation interrupt\n");
+
+       for (i = 0; i < data->count; i++) {
+               void __iomem *p = data->pamu_reg_base + i * PAMU_OFFSET;
+               u32 pics = in_be32(p + PAMU_PICS);
+
+               if (pics & PAMU_ACCESS_VIOLATION_STAT) {
+                       u32 avs1 = in_be32(p + PAMU_AVS1);
+                       struct paace *paace;
+
+                       pr_emerg("POES1=%08x\n", in_be32(p + PAMU_POES1));
+                       pr_emerg("POES2=%08x\n", in_be32(p + PAMU_POES2));
+                       pr_emerg("AVS1=%08x\n", avs1);
+                       pr_emerg("AVS2=%08x\n", in_be32(p + PAMU_AVS2));
+                       pr_emerg("AVA=%016llx\n", make64(in_be32(p + PAMU_AVAH),
+                               in_be32(p + PAMU_AVAL)));
+                       pr_emerg("UDAD=%08x\n", in_be32(p + PAMU_UDAD));
+                       pr_emerg("POEA=%016llx\n", make64(in_be32(p + PAMU_POEAH),
+                               in_be32(p + PAMU_POEAL)));
+
+                       phys = make64(in_be32(p + PAMU_POEAH),
+                               in_be32(p + PAMU_POEAL));
+
+                       /* Assume that POEA points to a PAACE */
+                       if (phys) {
+                               u32 *paace = phys_to_virt(phys);
+
+                               /* Only the first four words are relevant */
+                               for (j = 0; j < 4; j++)
+                                       pr_emerg("PAACE[%u]=%08x\n", j, in_be32(paace + j));
+                       }
+
+                       /* clear access violation condition */
+                       out_be32((p + PAMU_AVS1), avs1 & PAMU_AV_MASK);
+                       paace = pamu_get_ppaace(avs1 >> PAMU_AVS1_LIODN_SHIFT);
+                       BUG_ON(!paace);
+                       /* check if we got a violation for a disabled LIODN */
+                       if (!get_bf(paace->addr_bitfields, PAACE_AF_V)) {
+                               /*
+                                * As per hardware erratum A-003638, access
+                                * violation can be reported for a disabled
+                                * LIODN. If we hit that condition, disable
+                                * access violation reporting.
+                                */
+                               pics &= ~PAMU_ACCESS_VIOLATION_ENABLE;
+                       } else {
+                               /* Disable the LIODN */
+                               ret = pamu_disable_liodn(avs1 >> PAMU_AVS1_LIODN_SHIFT);
+                               BUG_ON(ret);
+                               pr_emerg("Disabling liodn %x\n", avs1 >> PAMU_AVS1_LIODN_SHIFT);
+                       }
+                       out_be32((p + PAMU_PICS), pics);
+               }
+       }
+
+
+       return IRQ_HANDLED;
+}
+
+#define LAWAR_EN               0x80000000
+#define LAWAR_TARGET_MASK      0x0FF00000
+#define LAWAR_TARGET_SHIFT     20
+#define LAWAR_SIZE_MASK                0x0000003F
+#define LAWAR_CSDID_MASK       0x000FF000
+#define LAWAR_CSDID_SHIFT      12
+
+#define LAW_SIZE_4K            0xb
+
+struct ccsr_law {
+       u32     lawbarh;        /* LAWn base address high */
+       u32     lawbarl;        /* LAWn base address low */
+       u32     lawar;          /* LAWn attributes */
+       u32     reserved;
+};
+
+/*
+ * Create a coherence subdomain for a given memory block.
+ */
+static int __init create_csd(phys_addr_t phys, size_t size, u32 csd_port_id)
+{
+       struct device_node *np;
+       const __be32 *iprop;
+       void __iomem *lac = NULL;       /* Local Access Control registers */
+       struct ccsr_law __iomem *law;
+       void __iomem *ccm = NULL;
+       u32 __iomem *csdids;
+       unsigned int i, num_laws, num_csds;
+       u32 law_target = 0;
+       u32 csd_id = 0;
+       int ret = 0;
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,corenet-law");
+       if (!np)
+               return -ENODEV;
+
+       iprop = of_get_property(np, "fsl,num-laws", NULL);
+       if (!iprop) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       num_laws = be32_to_cpup(iprop);
+       if (!num_laws) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       lac = of_iomap(np, 0);
+       if (!lac) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       /* LAW registers are at offset 0xC00 */
+       law = lac + 0xC00;
+
+       of_node_put(np);
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,corenet-cf");
+       if (!np) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       iprop = of_get_property(np, "fsl,ccf-num-csdids", NULL);
+       if (!iprop) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       num_csds = be32_to_cpup(iprop);
+       if (!num_csds) {
+               ret = -ENODEV;
+               goto error;
+       }
+
+       ccm = of_iomap(np, 0);
+       if (!ccm) {
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       /* The undocumented CSDID registers are at offset 0x600 */
+       csdids = ccm + 0x600;
+
+       of_node_put(np);
+       np = NULL;
+
+       /* Find an unused coherence subdomain ID */
+       for (csd_id = 0; csd_id < num_csds; csd_id++) {
+               if (!csdids[csd_id])
+                       break;
+       }
+
+       /* Store the Port ID in the (undocumented) proper CIDMRxx register */
+       csdids[csd_id] = csd_port_id;
+
+       /* Find the DDR LAW that maps to our buffer. */
+       for (i = 0; i < num_laws; i++) {
+               if (law[i].lawar & LAWAR_EN) {
+                       phys_addr_t law_start, law_end;
+
+                       law_start = make64(law[i].lawbarh, law[i].lawbarl);
+                       law_end = law_start +
+                               (2ULL << (law[i].lawar & LAWAR_SIZE_MASK));
+
+                       if (law_start <= phys && phys < law_end) {
+                               law_target = law[i].lawar & LAWAR_TARGET_MASK;
+                               break;
+                       }
+               }
+       }
+
+       if (i == 0 || i == num_laws) {
+               /* This should never happen*/
+               ret = -ENOENT;
+               goto error;
+       }
+
+       /* Find a free LAW entry */
+       while (law[--i].lawar & LAWAR_EN) {
+               if (i == 0) {
+                       /* No higher priority LAW slots available */
+                       ret = -ENOENT;
+                       goto error;
+               }
+       }
+
+       law[i].lawbarh = upper_32_bits(phys);
+       law[i].lawbarl = lower_32_bits(phys);
+       wmb();
+       law[i].lawar = LAWAR_EN | law_target | (csd_id << LAWAR_CSDID_SHIFT) |
+               (LAW_SIZE_4K + get_order(size));
+       wmb();
+
+error:
+       if (ccm)
+               iounmap(ccm);
+
+       if (lac)
+               iounmap(lac);
+
+       if (np)
+               of_node_put(np);
+
+       return ret;
+}
+
+/*
+ * Table of SVRs and the corresponding PORT_ID values. Port ID corresponds to a
+ * bit map of snoopers for a given range of memory mapped by a LAW.
+ *
+ * All future CoreNet-enabled SOCs will have this erratum(A-004510) fixed, so this
+ * table should never need to be updated.  SVRs are guaranteed to be unique, so
+ * there is no worry that a future SOC will inadvertently have one of these
+ * values.
+ */
+static const struct {
+       u32 svr;
+       u32 port_id;
+} port_id_map[] = {
+       {0x82100010, 0xFF000000},       /* P2040 1.0 */
+       {0x82100011, 0xFF000000},       /* P2040 1.1 */
+       {0x82100110, 0xFF000000},       /* P2041 1.0 */
+       {0x82100111, 0xFF000000},       /* P2041 1.1 */
+       {0x82110310, 0xFF000000},       /* P3041 1.0 */
+       {0x82110311, 0xFF000000},       /* P3041 1.1 */
+       {0x82010020, 0xFFF80000},       /* P4040 2.0 */
+       {0x82000020, 0xFFF80000},       /* P4080 2.0 */
+       {0x82210010, 0xFC000000},       /* P5010 1.0 */
+       {0x82210020, 0xFC000000},       /* P5010 2.0 */
+       {0x82200010, 0xFC000000},       /* P5020 1.0 */
+       {0x82050010, 0xFF800000},       /* P5021 1.0 */
+       {0x82040010, 0xFF800000},       /* P5040 1.0 */
+};
+
+#define SVR_SECURITY   0x80000 /* The Security (E) bit */
+
+static int __init fsl_pamu_probe(struct platform_device *pdev)
+{
+       void __iomem *pamu_regs = NULL;
+       struct ccsr_guts __iomem *guts_regs = NULL;
+       u32 pamubypenr, pamu_counter;
+       unsigned long pamu_reg_off;
+       unsigned long pamu_reg_base;
+       struct pamu_isr_data *data = NULL;
+       struct device_node *guts_node;
+       u64 size;
+       struct page *p;
+       int ret = 0;
+       int irq;
+       phys_addr_t ppaact_phys;
+       phys_addr_t spaact_phys;
+       phys_addr_t omt_phys;
+       size_t mem_size = 0;
+       unsigned int order = 0;
+       u32 csd_port_id = 0;
+       unsigned i;
+       /*
+        * enumerate all PAMUs and allocate and setup PAMU tables
+        * for each of them,
+        * NOTE : All PAMUs share the same LIODN tables.
+        */
+
+       pamu_regs = of_iomap(pdev->dev.of_node, 0);
+       if (!pamu_regs) {
+               dev_err(&pdev->dev, "ioremap of PAMU node failed\n");
+               return -ENOMEM;
+       }
+       of_get_address(pdev->dev.of_node, 0, &size, NULL);
+
+       irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+       if (irq == NO_IRQ) {
+               dev_warn(&pdev->dev, "no interrupts listed in PAMU node\n");
+               goto error;
+       }
+
+       data = kzalloc(sizeof(struct pamu_isr_data), GFP_KERNEL);
+       if (!data) {
+               dev_err(&pdev->dev, "PAMU isr data memory allocation failed\n");
+               ret = -ENOMEM;
+               goto error;
+       }
+       data->pamu_reg_base = pamu_regs;
+       data->count = size / PAMU_OFFSET;
+
+       /* The ISR needs access to the regs, so we won't iounmap them */
+       ret = request_irq(irq, pamu_av_isr, 0, "pamu", data);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "error %i installing ISR for irq %i\n",
+                       ret, irq);
+               goto error;
+       }
+
+       guts_node = of_find_matching_node(NULL, guts_device_ids);
+       if (!guts_node) {
+               dev_err(&pdev->dev, "could not find GUTS node %s\n",
+                       pdev->dev.of_node->full_name);
+               ret = -ENODEV;
+               goto error;
+       }
+
+       guts_regs = of_iomap(guts_node, 0);
+       of_node_put(guts_node);
+       if (!guts_regs) {
+               dev_err(&pdev->dev, "ioremap of GUTS node failed\n");
+               ret = -ENODEV;
+               goto error;
+       }
+
+       /* read in the PAMU capability registers */
+       get_pamu_cap_values((unsigned long)pamu_regs);
+       /*
+        * To simplify the allocation of a coherency domain, we allocate the
+        * PAACT and the OMT in the same memory buffer.  Unfortunately, this
+        * wastes more memory compared to allocating the buffers separately.
+        */
+       /* Determine how much memory we need */
+       mem_size = (PAGE_SIZE << get_order(PAACT_SIZE)) +
+               (PAGE_SIZE << get_order(SPAACT_SIZE)) +
+               (PAGE_SIZE << get_order(OMT_SIZE));
+       order = get_order(mem_size);
+
+       p = alloc_pages(GFP_KERNEL | __GFP_ZERO, order);
+       if (!p) {
+               dev_err(&pdev->dev, "unable to allocate PAACT/SPAACT/OMT block\n");
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       ppaact = page_address(p);
+       ppaact_phys = page_to_phys(p);
+
+       /* Make sure the memory is naturally aligned */
+       if (ppaact_phys & ((PAGE_SIZE << order) - 1)) {
+               dev_err(&pdev->dev, "PAACT/OMT block is unaligned\n");
+               ret = -ENOMEM;
+               goto error;
+       }
+
+       spaact = (void *)ppaact + (PAGE_SIZE << get_order(PAACT_SIZE));
+       omt = (void *)spaact + (PAGE_SIZE << get_order(SPAACT_SIZE));
+
+       dev_dbg(&pdev->dev, "ppaact virt=%p phys=0x%llx\n", ppaact,
+               (unsigned long long) ppaact_phys);
+
+       /* Check to see if we need to implement the work-around on this SOC */
+
+       /* Determine the Port ID for our coherence subdomain */
+       for (i = 0; i < ARRAY_SIZE(port_id_map); i++) {
+               if (port_id_map[i].svr == (mfspr(SPRN_SVR) & ~SVR_SECURITY)) {
+                       csd_port_id = port_id_map[i].port_id;
+                       dev_dbg(&pdev->dev, "found matching SVR %08x\n",
+                               port_id_map[i].svr);
+                       break;
+               }
+       }
+
+       if (csd_port_id) {
+               dev_dbg(&pdev->dev, "creating coherency subdomain at address "
+                       "0x%llx, size %zu, port id 0x%08x", ppaact_phys,
+                       mem_size, csd_port_id);
+
+               ret = create_csd(ppaact_phys, mem_size, csd_port_id);
+               if (ret) {
+                       dev_err(&pdev->dev, "could not create coherence "
+                               "subdomain\n");
+                       return ret;
+               }
+       }
+
+       spaact_phys = virt_to_phys(spaact);
+       omt_phys = virt_to_phys(omt);
+
+       spaace_pool = gen_pool_create(ilog2(sizeof(struct paace)), -1);
+       if (!spaace_pool) {
+               ret = -ENOMEM;
+               dev_err(&pdev->dev, "PAMU : failed to allocate spaace gen pool\n");
+               goto error;
+       }
+
+       ret = gen_pool_add(spaace_pool, (unsigned long)spaact, SPAACT_SIZE, -1);
+       if (ret)
+               goto error_genpool;
+
+       pamubypenr = in_be32(&guts_regs->pamubypenr);
+
+       for (pamu_reg_off = 0, pamu_counter = 0x80000000; pamu_reg_off < size;
+            pamu_reg_off += PAMU_OFFSET, pamu_counter >>= 1) {
+
+               pamu_reg_base = (unsigned long) pamu_regs + pamu_reg_off;
+               setup_one_pamu(pamu_reg_base, pamu_reg_off, ppaact_phys,
+                                spaact_phys, omt_phys);
+               /* Disable PAMU bypass for this PAMU */
+               pamubypenr &= ~pamu_counter;
+       }
+
+       setup_omt(omt);
+
+       /* Enable all relevant PAMU(s) */
+       out_be32(&guts_regs->pamubypenr, pamubypenr);
+
+       iounmap(guts_regs);
+
+       /* Enable DMA for the LIODNs in the device tree*/
+
+       setup_liodns();
+
+       return 0;
+
+error_genpool:
+       gen_pool_destroy(spaace_pool);
+
+error:
+       if (irq != NO_IRQ)
+               free_irq(irq, data);
+
+       if (data) {
+               memset(data, 0, sizeof(struct pamu_isr_data));
+               kfree(data);
+       }
+
+       if (pamu_regs)
+               iounmap(pamu_regs);
+
+       if (guts_regs)
+               iounmap(guts_regs);
+
+       if (ppaact)
+               free_pages((unsigned long)ppaact, order);
+
+       ppaact = NULL;
+
+       return ret;
+}
+
+static const struct of_device_id fsl_of_pamu_ids[] = {
+       {
+               .compatible = "fsl,p4080-pamu",
+       },
+       {
+               .compatible = "fsl,pamu",
+       },
+       {},
+};
+
+static struct platform_driver fsl_of_pamu_driver = {
+       .driver = {
+               .name = "fsl-of-pamu",
+               .owner = THIS_MODULE,
+       },
+       .probe = fsl_pamu_probe,
+};
+
+static __init int fsl_pamu_init(void)
+{
+       struct platform_device *pdev = NULL;
+       struct device_node *np;
+       int ret;
+
+       /*
+        * The normal OF process calls the probe function at some
+        * indeterminate later time, after most drivers have loaded.  This is
+        * too late for us, because PAMU clients (like the Qman driver)
+        * depend on PAMU being initialized early.
+        *
+        * So instead, we "manually" call our probe function by creating the
+        * platform devices ourselves.
+        */
+
+       /*
+        * We assume that there is only one PAMU node in the device tree.  A
+        * single PAMU node represents all of the PAMU devices in the SOC
+        * already.   Everything else already makes that assumption, and the
+        * binding for the PAMU nodes doesn't allow for any parent-child
+        * relationships anyway.  In other words, support for more than one
+        * PAMU node would require significant changes to a lot of code.
+        */
+
+       np = of_find_compatible_node(NULL, NULL, "fsl,pamu");
+       if (!np) {
+               pr_err("could not find a PAMU node\n");
+               return -ENODEV;
+       }
+
+       ret = platform_driver_register(&fsl_of_pamu_driver);
+       if (ret) {
+               pr_err("could not register driver (err=%i)\n", ret);
+               goto error_driver_register;
+       }
+
+       pdev = platform_device_alloc("fsl-of-pamu", 0);
+       if (!pdev) {
+               pr_err("could not allocate device %s\n",
+                      np->full_name);
+               ret = -ENOMEM;
+               goto error_device_alloc;
+       }
+       pdev->dev.of_node = of_node_get(np);
+
+       ret = pamu_domain_init();
+       if (ret)
+               goto error_device_add;
+
+       ret = platform_device_add(pdev);
+       if (ret) {
+               pr_err("could not add device %s (err=%i)\n",
+                      np->full_name, ret);
+               goto error_device_add;
+       }
+
+       return 0;
+
+error_device_add:
+       of_node_put(pdev->dev.of_node);
+       pdev->dev.of_node = NULL;
+
+       platform_device_put(pdev);
+
+error_device_alloc:
+       platform_driver_unregister(&fsl_of_pamu_driver);
+
+error_driver_register:
+       of_node_put(np);
+
+       return ret;
+}
+arch_initcall(fsl_pamu_init);
diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h
new file mode 100644 (file)
index 0000000..8fc1a12
--- /dev/null
@@ -0,0 +1,410 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_H
+#define __FSL_PAMU_H
+
+#include <asm/fsl_pamu_stash.h>
+
+/* Bit Field macros
+ *     v = bit field variable; m = mask, m##_SHIFT = shift, x = value to load
+ */
+#define set_bf(v, m, x)                (v = ((v) & ~(m)) | (((x) << (m##_SHIFT)) & (m)))
+#define get_bf(v, m)           (((v) & (m)) >> (m##_SHIFT))
+
+/* PAMU CCSR space */
+#define PAMU_PGC 0x00000000     /* Allows all peripheral accesses */
+#define PAMU_PE 0x40000000      /* enable PAMU                    */
+
+/* PAMU_OFFSET to the next pamu space in ccsr */
+#define PAMU_OFFSET 0x1000
+
+#define PAMU_MMAP_REGS_BASE 0
+
+struct pamu_mmap_regs {
+       u32 ppbah;
+       u32 ppbal;
+       u32 pplah;
+       u32 pplal;
+       u32 spbah;
+       u32 spbal;
+       u32 splah;
+       u32 splal;
+       u32 obah;
+       u32 obal;
+       u32 olah;
+       u32 olal;
+};
+
+/* PAMU Error Registers */
+#define PAMU_POES1 0x0040
+#define PAMU_POES2 0x0044
+#define PAMU_POEAH 0x0048
+#define PAMU_POEAL 0x004C
+#define PAMU_AVS1  0x0050
+#define PAMU_AVS1_AV    0x1
+#define PAMU_AVS1_OTV   0x6
+#define PAMU_AVS1_APV   0x78
+#define PAMU_AVS1_WAV   0x380
+#define PAMU_AVS1_LAV   0x1c00
+#define PAMU_AVS1_GCV   0x2000
+#define PAMU_AVS1_PDV   0x4000
+#define PAMU_AV_MASK    (PAMU_AVS1_AV | PAMU_AVS1_OTV | PAMU_AVS1_APV | PAMU_AVS1_WAV \
+                       | PAMU_AVS1_LAV | PAMU_AVS1_GCV | PAMU_AVS1_PDV)
+#define PAMU_AVS1_LIODN_SHIFT 16
+#define PAMU_LAV_LIODN_NOT_IN_PPAACT 0x400
+
+#define PAMU_AVS2  0x0054
+#define PAMU_AVAH  0x0058
+#define PAMU_AVAL  0x005C
+#define PAMU_EECTL 0x0060
+#define PAMU_EEDIS 0x0064
+#define PAMU_EEINTEN 0x0068
+#define PAMU_EEDET 0x006C
+#define PAMU_EEATTR 0x0070
+#define PAMU_EEAHI 0x0074
+#define PAMU_EEALO 0x0078
+#define PAMU_EEDHI 0X007C
+#define PAMU_EEDLO 0x0080
+#define PAMU_EECC  0x0084
+#define PAMU_UDAD  0x0090
+
+/* PAMU Revision Registers */
+#define PAMU_PR1 0x0BF8
+#define PAMU_PR2 0x0BFC
+
+/* PAMU version mask */
+#define PAMU_PR1_MASK 0xffff
+
+/* PAMU Capabilities Registers */
+#define PAMU_PC1 0x0C00
+#define PAMU_PC2 0x0C04
+#define PAMU_PC3 0x0C08
+#define PAMU_PC4 0x0C0C
+
+/* PAMU Control Register */
+#define PAMU_PC 0x0C10
+
+/* PAMU control defs */
+#define PAMU_CONTROL 0x0C10
+#define PAMU_PC_PGC 0x80000000  /* PAMU gate closed bit */
+#define PAMU_PC_PE   0x40000000 /* PAMU enable bit */
+#define PAMU_PC_SPCC 0x00000010 /* sPAACE cache enable */
+#define PAMU_PC_PPCC 0x00000001 /* pPAACE cache enable */
+#define PAMU_PC_OCE  0x00001000 /* OMT cache enable */
+
+#define PAMU_PFA1 0x0C14
+#define PAMU_PFA2 0x0C18
+
+#define PAMU_PC2_MLIODN(X) ((X) >> 16)
+#define PAMU_PC3_MWCE(X) (((X) >> 21) & 0xf)
+
+/* PAMU Interrupt control and Status Register */
+#define PAMU_PICS 0x0C1C
+#define PAMU_ACCESS_VIOLATION_STAT   0x8
+#define PAMU_ACCESS_VIOLATION_ENABLE 0x4
+
+/* PAMU Debug Registers */
+#define PAMU_PD1 0x0F00
+#define PAMU_PD2 0x0F04
+#define PAMU_PD3 0x0F08
+#define PAMU_PD4 0x0F0C
+
+#define PAACE_AP_PERMS_DENIED  0x0
+#define PAACE_AP_PERMS_QUERY   0x1
+#define PAACE_AP_PERMS_UPDATE  0x2
+#define PAACE_AP_PERMS_ALL     0x3
+
+#define PAACE_DD_TO_HOST       0x0
+#define PAACE_DD_TO_IO         0x1
+#define PAACE_PT_PRIMARY       0x0
+#define PAACE_PT_SECONDARY     0x1
+#define PAACE_V_INVALID        0x0
+#define PAACE_V_VALID          0x1
+#define PAACE_MW_SUBWINDOWS    0x1
+
+#define PAACE_WSE_4K           0xB
+#define PAACE_WSE_8K           0xC
+#define PAACE_WSE_16K          0xD
+#define PAACE_WSE_32K          0xE
+#define PAACE_WSE_64K          0xF
+#define PAACE_WSE_128K         0x10
+#define PAACE_WSE_256K         0x11
+#define PAACE_WSE_512K         0x12
+#define PAACE_WSE_1M           0x13
+#define PAACE_WSE_2M           0x14
+#define PAACE_WSE_4M           0x15
+#define PAACE_WSE_8M           0x16
+#define PAACE_WSE_16M          0x17
+#define PAACE_WSE_32M          0x18
+#define PAACE_WSE_64M          0x19
+#define PAACE_WSE_128M         0x1A
+#define PAACE_WSE_256M         0x1B
+#define PAACE_WSE_512M         0x1C
+#define PAACE_WSE_1G           0x1D
+#define PAACE_WSE_2G           0x1E
+#define PAACE_WSE_4G           0x1F
+
+#define PAACE_DID_PCI_EXPRESS_1 0x00
+#define PAACE_DID_PCI_EXPRESS_2 0x01
+#define PAACE_DID_PCI_EXPRESS_3 0x02
+#define PAACE_DID_PCI_EXPRESS_4 0x03
+#define PAACE_DID_LOCAL_BUS     0x04
+#define PAACE_DID_SRIO          0x0C
+#define PAACE_DID_MEM_1         0x10
+#define PAACE_DID_MEM_2         0x11
+#define PAACE_DID_MEM_3         0x12
+#define PAACE_DID_MEM_4         0x13
+#define PAACE_DID_MEM_1_2       0x14
+#define PAACE_DID_MEM_3_4       0x15
+#define PAACE_DID_MEM_1_4       0x16
+#define PAACE_DID_BM_SW_PORTAL  0x18
+#define PAACE_DID_PAMU          0x1C
+#define PAACE_DID_CAAM          0x21
+#define PAACE_DID_QM_SW_PORTAL  0x3C
+#define PAACE_DID_CORE0_INST    0x80
+#define PAACE_DID_CORE0_DATA    0x81
+#define PAACE_DID_CORE1_INST    0x82
+#define PAACE_DID_CORE1_DATA    0x83
+#define PAACE_DID_CORE2_INST    0x84
+#define PAACE_DID_CORE2_DATA    0x85
+#define PAACE_DID_CORE3_INST    0x86
+#define PAACE_DID_CORE3_DATA    0x87
+#define PAACE_DID_CORE4_INST    0x88
+#define PAACE_DID_CORE4_DATA    0x89
+#define PAACE_DID_CORE5_INST    0x8A
+#define PAACE_DID_CORE5_DATA    0x8B
+#define PAACE_DID_CORE6_INST    0x8C
+#define PAACE_DID_CORE6_DATA    0x8D
+#define PAACE_DID_CORE7_INST    0x8E
+#define PAACE_DID_CORE7_DATA    0x8F
+#define PAACE_DID_BROADCAST     0xFF
+
+#define PAACE_ATM_NO_XLATE      0x00
+#define PAACE_ATM_WINDOW_XLATE  0x01
+#define PAACE_ATM_PAGE_XLATE    0x02
+#define PAACE_ATM_WIN_PG_XLATE  \
+                (PAACE_ATM_WINDOW_XLATE | PAACE_ATM_PAGE_XLATE)
+#define PAACE_OTM_NO_XLATE      0x00
+#define PAACE_OTM_IMMEDIATE     0x01
+#define PAACE_OTM_INDEXED       0x02
+#define PAACE_OTM_RESERVED      0x03
+
+#define PAACE_M_COHERENCE_REQ   0x01
+
+#define PAACE_PID_0             0x0
+#define PAACE_PID_1             0x1
+#define PAACE_PID_2             0x2
+#define PAACE_PID_3             0x3
+#define PAACE_PID_4             0x4
+#define PAACE_PID_5             0x5
+#define PAACE_PID_6             0x6
+#define PAACE_PID_7             0x7
+
+#define PAACE_TCEF_FORMAT0_8B   0x00
+#define PAACE_TCEF_FORMAT1_RSVD 0x01
+/*
+ * Hard coded value for the PAACT size to accomodate
+ * maximum LIODN value generated by u-boot.
+ */
+#define PAACE_NUMBER_ENTRIES    0x500
+/* Hard coded value for the SPAACT size */
+#define SPAACE_NUMBER_ENTRIES  0x800
+
+#define        OME_NUMBER_ENTRIES      16
+
+/* PAACE Bit Field Defines */
+#define PPAACE_AF_WBAL                 0xfffff000
+#define PPAACE_AF_WBAL_SHIFT           12
+#define PPAACE_AF_WSE                  0x00000fc0
+#define PPAACE_AF_WSE_SHIFT            6
+#define PPAACE_AF_MW                   0x00000020
+#define PPAACE_AF_MW_SHIFT             5
+
+#define SPAACE_AF_LIODN                        0xffff0000
+#define SPAACE_AF_LIODN_SHIFT          16
+
+#define PAACE_AF_AP                    0x00000018
+#define PAACE_AF_AP_SHIFT              3
+#define PAACE_AF_DD                    0x00000004
+#define PAACE_AF_DD_SHIFT              2
+#define PAACE_AF_PT                    0x00000002
+#define PAACE_AF_PT_SHIFT              1
+#define PAACE_AF_V                     0x00000001
+#define PAACE_AF_V_SHIFT               0
+
+#define PAACE_DA_HOST_CR               0x80
+#define PAACE_DA_HOST_CR_SHIFT         7
+
+#define PAACE_IA_CID                   0x00FF0000
+#define PAACE_IA_CID_SHIFT             16
+#define PAACE_IA_WCE                   0x000000F0
+#define PAACE_IA_WCE_SHIFT             4
+#define PAACE_IA_ATM                   0x0000000C
+#define PAACE_IA_ATM_SHIFT             2
+#define PAACE_IA_OTM                   0x00000003
+#define PAACE_IA_OTM_SHIFT             0
+
+#define PAACE_WIN_TWBAL                        0xfffff000
+#define PAACE_WIN_TWBAL_SHIFT          12
+#define PAACE_WIN_SWSE                 0x00000fc0
+#define PAACE_WIN_SWSE_SHIFT           6
+
+/* PAMU Data Structures */
+/* primary / secondary paact structure */
+struct paace {
+       /* PAACE Offset 0x00 */
+       u32 wbah;                               /* only valid for Primary PAACE */
+       u32 addr_bitfields;             /* See P/S PAACE_AF_* */
+
+       /* PAACE Offset 0x08 */
+       /* Interpretation of first 32 bits dependent on DD above */
+       union {
+               struct {
+                       /* Destination ID, see PAACE_DID_* defines */
+                       u8 did;
+                       /* Partition ID */
+                       u8 pid;
+                       /* Snoop ID */
+                       u8 snpid;
+                       /* coherency_required : 1 reserved : 7 */
+                       u8 coherency_required; /* See PAACE_DA_* */
+               } to_host;
+               struct {
+                       /* Destination ID, see PAACE_DID_* defines */
+                       u8  did;
+                       u8  reserved1;
+                       u16 reserved2;
+               } to_io;
+       } domain_attr;
+
+       /* Implementation attributes + window count + address & operation translation modes */
+       u32 impl_attr;                  /* See PAACE_IA_* */
+
+       /* PAACE Offset 0x10 */
+       /* Translated window base address */
+       u32 twbah;
+       u32 win_bitfields;                      /* See PAACE_WIN_* */
+
+       /* PAACE Offset 0x18 */
+       /* first secondary paace entry */
+       u32 fspi;                               /* only valid for Primary PAACE */
+       union {
+               struct {
+                       u8 ioea;
+                       u8 moea;
+                       u8 ioeb;
+                       u8 moeb;
+               } immed_ot;
+               struct {
+                       u16 reserved;
+                       u16 omi;
+               } index_ot;
+       } op_encode;
+
+       /* PAACE Offsets 0x20-0x38 */
+       u32 reserved[8];                        /* not currently implemented */
+};
+
+/* OME : Operation mapping entry
+ * MOE : Mapped Operation Encodings
+ * The operation mapping table is table containing operation mapping entries (OME).
+ * The index of a particular OME is programmed in the PAACE entry for translation
+ * in bound I/O operations corresponding to an LIODN. The OMT is used for translation
+ * specifically in case of the indexed translation mode. Each OME contains a 128
+ * byte mapped operation encoding (MOE), where each byte represents an MOE.
+ */
+#define NUM_MOE 128
+struct ome {
+       u8 moe[NUM_MOE];
+} __attribute__((packed));
+
+#define PAACT_SIZE              (sizeof(struct paace) * PAACE_NUMBER_ENTRIES)
+#define SPAACT_SIZE              (sizeof(struct paace) * SPAACE_NUMBER_ENTRIES)
+#define OMT_SIZE                (sizeof(struct ome) * OME_NUMBER_ENTRIES)
+
+#define PAMU_PAGE_SHIFT 12
+#define PAMU_PAGE_SIZE  4096ULL
+
+#define IOE_READ        0x00
+#define IOE_READ_IDX    0x00
+#define IOE_WRITE       0x81
+#define IOE_WRITE_IDX   0x01
+#define IOE_EREAD0      0x82    /* Enhanced read type 0 */
+#define IOE_EREAD0_IDX  0x02    /* Enhanced read type 0 */
+#define IOE_EWRITE0     0x83    /* Enhanced write type 0 */
+#define IOE_EWRITE0_IDX 0x03    /* Enhanced write type 0 */
+#define IOE_DIRECT0     0x84    /* Directive type 0 */
+#define IOE_DIRECT0_IDX 0x04    /* Directive type 0 */
+#define IOE_EREAD1      0x85    /* Enhanced read type 1 */
+#define IOE_EREAD1_IDX  0x05    /* Enhanced read type 1 */
+#define IOE_EWRITE1     0x86    /* Enhanced write type 1 */
+#define IOE_EWRITE1_IDX 0x06    /* Enhanced write type 1 */
+#define IOE_DIRECT1     0x87    /* Directive type 1 */
+#define IOE_DIRECT1_IDX 0x07    /* Directive type 1 */
+#define IOE_RAC         0x8c    /* Read with Atomic clear */
+#define IOE_RAC_IDX     0x0c    /* Read with Atomic clear */
+#define IOE_RAS         0x8d    /* Read with Atomic set */
+#define IOE_RAS_IDX     0x0d    /* Read with Atomic set */
+#define IOE_RAD         0x8e    /* Read with Atomic decrement */
+#define IOE_RAD_IDX     0x0e    /* Read with Atomic decrement */
+#define IOE_RAI         0x8f    /* Read with Atomic increment */
+#define IOE_RAI_IDX     0x0f    /* Read with Atomic increment */
+
+#define EOE_READ        0x00
+#define EOE_WRITE       0x01
+#define EOE_RAC         0x0c    /* Read with Atomic clear */
+#define EOE_RAS         0x0d    /* Read with Atomic set */
+#define EOE_RAD         0x0e    /* Read with Atomic decrement */
+#define EOE_RAI         0x0f    /* Read with Atomic increment */
+#define EOE_LDEC        0x10    /* Load external cache */
+#define EOE_LDECL       0x11    /* Load external cache with stash lock */
+#define EOE_LDECPE      0x12    /* Load external cache with preferred exclusive */
+#define EOE_LDECPEL     0x13    /* Load external cache with preferred exclusive and lock */
+#define EOE_LDECFE      0x14    /* Load external cache with forced exclusive */
+#define EOE_LDECFEL     0x15    /* Load external cache with forced exclusive and lock */
+#define EOE_RSA         0x16    /* Read with stash allocate */
+#define EOE_RSAU        0x17    /* Read with stash allocate and unlock */
+#define EOE_READI       0x18    /* Read with invalidate */
+#define EOE_RWNITC      0x19    /* Read with no intention to cache */
+#define EOE_WCI         0x1a    /* Write cache inhibited */
+#define EOE_WWSA        0x1b    /* Write with stash allocate */
+#define EOE_WWSAL       0x1c    /* Write with stash allocate and lock */
+#define EOE_WWSAO       0x1d    /* Write with stash allocate only */
+#define EOE_WWSAOL      0x1e    /* Write with stash allocate only and lock */
+#define EOE_VALID       0x80
+
+/* Function prototypes */
+int pamu_domain_init(void);
+int pamu_enable_liodn(int liodn);
+int pamu_disable_liodn(int liodn);
+void pamu_free_subwins(int liodn);
+int pamu_config_ppaace(int liodn, phys_addr_t win_addr, phys_addr_t win_size,
+                      u32 omi, unsigned long rpn, u32 snoopid, uint32_t stashid,
+                      u32 subwin_cnt, int prot);
+int pamu_config_spaace(int liodn, u32 subwin_cnt, u32 subwin_addr,
+                      phys_addr_t subwin_size, u32 omi, unsigned long rpn,
+                      uint32_t snoopid, u32 stashid, int enable, int prot);
+
+u32 get_stash_id(u32 stash_dest_hint, u32 vcpu);
+void get_ome_index(u32 *omi_index, struct device *dev);
+int  pamu_update_paace_stash(int liodn, u32 subwin, u32 value);
+int pamu_disable_spaace(int liodn, u32 subwin);
+u32 pamu_get_max_subwin_cnt(void);
+
+#endif  /* __FSL_PAMU_H */
diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c
new file mode 100644 (file)
index 0000000..c857c30
--- /dev/null
@@ -0,0 +1,1172 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ * Author: Varun Sethi <varun.sethi@freescale.com>
+ *
+ */
+
+#define pr_fmt(fmt)    "fsl-pamu-domain: %s: " fmt, __func__
+
+#include <linux/init.h>
+#include <linux/iommu.h>
+#include <linux/notifier.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/of_platform.h>
+#include <linux/bootmem.h>
+#include <linux/err.h>
+#include <asm/io.h>
+#include <asm/bitops.h>
+
+#include <asm/pci-bridge.h>
+#include <sysdev/fsl_pci.h>
+
+#include "fsl_pamu_domain.h"
+#include "pci.h"
+
+/*
+ * Global spinlock that needs to be held while
+ * configuring PAMU.
+ */
+static DEFINE_SPINLOCK(iommu_lock);
+
+static struct kmem_cache *fsl_pamu_domain_cache;
+static struct kmem_cache *iommu_devinfo_cache;
+static DEFINE_SPINLOCK(device_domain_lock);
+
+static int __init iommu_init_mempool(void)
+{
+
+       fsl_pamu_domain_cache = kmem_cache_create("fsl_pamu_domain",
+                                        sizeof(struct fsl_dma_domain),
+                                        0,
+                                        SLAB_HWCACHE_ALIGN,
+
+                                        NULL);
+       if (!fsl_pamu_domain_cache) {
+               pr_debug("Couldn't create fsl iommu_domain cache\n");
+               return -ENOMEM;
+       }
+
+       iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
+                                        sizeof(struct device_domain_info),
+                                        0,
+                                        SLAB_HWCACHE_ALIGN,
+                                        NULL);
+       if (!iommu_devinfo_cache) {
+               pr_debug("Couldn't create devinfo cache\n");
+               kmem_cache_destroy(fsl_pamu_domain_cache);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static phys_addr_t get_phys_addr(struct fsl_dma_domain *dma_domain, dma_addr_t iova)
+{
+       u32 win_cnt = dma_domain->win_cnt;
+       struct dma_window *win_ptr =
+                               &dma_domain->win_arr[0];
+       struct iommu_domain_geometry *geom;
+
+       geom = &dma_domain->iommu_domain->geometry;
+
+       if (!win_cnt || !dma_domain->geom_size) {
+               pr_debug("Number of windows/geometry not configured for the domain\n");
+               return 0;
+       }
+
+       if (win_cnt > 1) {
+               u64 subwin_size;
+               dma_addr_t subwin_iova;
+               u32 wnd;
+
+               subwin_size = dma_domain->geom_size >> ilog2(win_cnt);
+               subwin_iova = iova & ~(subwin_size - 1);
+               wnd = (subwin_iova - geom->aperture_start) >> ilog2(subwin_size);
+               win_ptr = &dma_domain->win_arr[wnd];
+       }
+
+       if (win_ptr->valid)
+               return (win_ptr->paddr + (iova & (win_ptr->size - 1)));
+
+       return 0;
+}
+
+static int map_subwins(int liodn, struct fsl_dma_domain *dma_domain)
+{
+       struct dma_window *sub_win_ptr =
+                               &dma_domain->win_arr[0];
+       int i, ret;
+       unsigned long rpn, flags;
+
+       for (i = 0; i < dma_domain->win_cnt; i++) {
+               if (sub_win_ptr[i].valid) {
+                       rpn = sub_win_ptr[i].paddr >>
+                                PAMU_PAGE_SHIFT;
+                       spin_lock_irqsave(&iommu_lock, flags);
+                       ret = pamu_config_spaace(liodn, dma_domain->win_cnt, i,
+                                                sub_win_ptr[i].size,
+                                                ~(u32)0,
+                                                rpn,
+                                                dma_domain->snoop_id,
+                                                dma_domain->stash_id,
+                                                (i > 0) ? 1 : 0,
+                                                sub_win_ptr[i].prot);
+                       spin_unlock_irqrestore(&iommu_lock, flags);
+                       if (ret) {
+                               pr_debug("PAMU SPAACE configuration failed for liodn %d\n",
+                                        liodn);
+                               return ret;
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static int map_win(int liodn, struct fsl_dma_domain *dma_domain)
+{
+       int ret;
+       struct dma_window *wnd = &dma_domain->win_arr[0];
+       phys_addr_t wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu_lock, flags);
+       ret = pamu_config_ppaace(liodn, wnd_addr,
+                                wnd->size,
+                                ~(u32)0,
+                                wnd->paddr >> PAMU_PAGE_SHIFT,
+                                dma_domain->snoop_id, dma_domain->stash_id,
+                                0, wnd->prot);
+       spin_unlock_irqrestore(&iommu_lock, flags);
+       if (ret)
+               pr_debug("PAMU PAACE configuration failed for liodn %d\n",
+                       liodn);
+
+       return ret;
+}
+
+/* Map the DMA window corresponding to the LIODN */
+static int map_liodn(int liodn, struct fsl_dma_domain *dma_domain)
+{
+       if (dma_domain->win_cnt > 1)
+               return map_subwins(liodn, dma_domain);
+       else
+               return map_win(liodn, dma_domain);
+
+}
+
+/* Update window/subwindow mapping for the LIODN */
+static int update_liodn(int liodn, struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+       int ret;
+       struct dma_window *wnd = &dma_domain->win_arr[wnd_nr];
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu_lock, flags);
+       if (dma_domain->win_cnt > 1) {
+               ret = pamu_config_spaace(liodn, dma_domain->win_cnt, wnd_nr,
+                                        wnd->size,
+                                        ~(u32)0,
+                                        wnd->paddr >> PAMU_PAGE_SHIFT,
+                                        dma_domain->snoop_id,
+                                        dma_domain->stash_id,
+                                        (wnd_nr > 0) ? 1 : 0,
+                                        wnd->prot);
+               if (ret)
+                       pr_debug("Subwindow reconfiguration failed for liodn %d\n", liodn);
+       } else {
+               phys_addr_t wnd_addr;
+
+               wnd_addr = dma_domain->iommu_domain->geometry.aperture_start;
+
+               ret = pamu_config_ppaace(liodn, wnd_addr,
+                                        wnd->size,
+                                        ~(u32)0,
+                                        wnd->paddr >> PAMU_PAGE_SHIFT,
+                                       dma_domain->snoop_id, dma_domain->stash_id,
+                                       0, wnd->prot);
+               if (ret)
+                       pr_debug("Window reconfiguration failed for liodn %d\n", liodn);
+       }
+
+       spin_unlock_irqrestore(&iommu_lock, flags);
+
+       return ret;
+}
+
+static int update_liodn_stash(int liodn, struct fsl_dma_domain *dma_domain,
+                                u32 val)
+{
+       int ret = 0, i;
+       unsigned long flags;
+
+       spin_lock_irqsave(&iommu_lock, flags);
+       if (!dma_domain->win_arr) {
+               pr_debug("Windows not configured, stash destination update failed for liodn %d\n", liodn);
+               spin_unlock_irqrestore(&iommu_lock, flags);
+               return -EINVAL;
+       }
+
+       for (i = 0; i < dma_domain->win_cnt; i++) {
+               ret = pamu_update_paace_stash(liodn, i, val);
+               if (ret) {
+                       pr_debug("Failed to update SPAACE %d field for liodn %d\n ", i, liodn);
+                       spin_unlock_irqrestore(&iommu_lock, flags);
+                       return ret;
+               }
+       }
+
+       spin_unlock_irqrestore(&iommu_lock, flags);
+
+       return ret;
+}
+
+/* Set the geometry parameters for a LIODN */
+static int pamu_set_liodn(int liodn, struct device *dev,
+                          struct fsl_dma_domain *dma_domain,
+                          struct iommu_domain_geometry *geom_attr,
+                          u32 win_cnt)
+{
+       phys_addr_t window_addr, window_size;
+       phys_addr_t subwin_size;
+       int ret = 0, i;
+       u32 omi_index = ~(u32)0;
+       unsigned long flags;
+
+       /*
+        * Configure the omi_index at the geometry setup time.
+        * This is a static value which depends on the type of
+        * device and would not change thereafter.
+        */
+       get_ome_index(&omi_index, dev);
+
+       window_addr = geom_attr->aperture_start;
+       window_size = dma_domain->geom_size;
+
+       spin_lock_irqsave(&iommu_lock, flags);
+       ret = pamu_disable_liodn(liodn);
+       if (!ret)
+               ret = pamu_config_ppaace(liodn, window_addr, window_size, omi_index,
+                                        0, dma_domain->snoop_id,
+                                        dma_domain->stash_id, win_cnt, 0);
+       spin_unlock_irqrestore(&iommu_lock, flags);
+       if (ret) {
+               pr_debug("PAMU PAACE configuration failed for liodn %d, win_cnt =%d\n", liodn, win_cnt);
+               return ret;
+       }
+
+       if (win_cnt > 1) {
+               subwin_size = window_size >> ilog2(win_cnt);
+               for (i = 0; i < win_cnt; i++) {
+                       spin_lock_irqsave(&iommu_lock, flags);
+                       ret = pamu_disable_spaace(liodn, i);
+                       if (!ret)
+                               ret = pamu_config_spaace(liodn, win_cnt, i,
+                                                        subwin_size, omi_index,
+                                                        0, dma_domain->snoop_id,
+                                                        dma_domain->stash_id,
+                                                        0, 0);
+                       spin_unlock_irqrestore(&iommu_lock, flags);
+                       if (ret) {
+                               pr_debug("PAMU SPAACE configuration failed for liodn %d\n", liodn);
+                               return ret;
+                       }
+               }
+       }
+
+       return ret;
+}
+
+static int check_size(u64 size, dma_addr_t iova)
+{
+       /*
+        * Size must be a power of two and at least be equal
+        * to PAMU page size.
+        */
+       if (!is_power_of_2(size) || size < PAMU_PAGE_SIZE) {
+               pr_debug("%s: size too small or not a power of two\n", __func__);
+               return -EINVAL;
+       }
+
+       /* iova must be page size aligned*/
+       if (iova & (size - 1)) {
+               pr_debug("%s: address is not aligned with window size\n", __func__);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+static struct fsl_dma_domain *iommu_alloc_dma_domain(void)
+{
+       struct fsl_dma_domain *domain;
+
+       domain = kmem_cache_zalloc(fsl_pamu_domain_cache, GFP_KERNEL);
+       if (!domain)
+               return NULL;
+
+       domain->stash_id = ~(u32)0;
+       domain->snoop_id = ~(u32)0;
+       domain->win_cnt = pamu_get_max_subwin_cnt();
+       domain->geom_size = 0;
+
+       INIT_LIST_HEAD(&domain->devices);
+
+       spin_lock_init(&domain->domain_lock);
+
+       return domain;
+}
+
+static inline struct device_domain_info *find_domain(struct device *dev)
+{
+       return dev->archdata.iommu_domain;
+}
+
+static void remove_device_ref(struct device_domain_info *info, u32 win_cnt)
+{
+       unsigned long flags;
+
+       list_del(&info->link);
+       spin_lock_irqsave(&iommu_lock, flags);
+       if (win_cnt > 1)
+               pamu_free_subwins(info->liodn);
+       pamu_disable_liodn(info->liodn);
+       spin_unlock_irqrestore(&iommu_lock, flags);
+       spin_lock_irqsave(&device_domain_lock, flags);
+       info->dev->archdata.iommu_domain = NULL;
+       kmem_cache_free(iommu_devinfo_cache, info);
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+}
+
+static void detach_device(struct device *dev, struct fsl_dma_domain *dma_domain)
+{
+       struct device_domain_info *info, *tmp;
+       unsigned long flags;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       /* Remove the device from the domain device list */
+       list_for_each_entry_safe(info, tmp, &dma_domain->devices, link) {
+               if (!dev || (info->dev == dev))
+                       remove_device_ref(info, dma_domain->win_cnt);
+       }
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+}
+
+static void attach_device(struct fsl_dma_domain *dma_domain, int liodn, struct device *dev)
+{
+       struct device_domain_info *info, *old_domain_info;
+       unsigned long flags;
+
+       spin_lock_irqsave(&device_domain_lock, flags);
+       /*
+        * Check here if the device is already attached to domain or not.
+        * If the device is already attached to a domain detach it.
+        */
+       old_domain_info = find_domain(dev);
+       if (old_domain_info && old_domain_info->domain != dma_domain) {
+               spin_unlock_irqrestore(&device_domain_lock, flags);
+               detach_device(dev, old_domain_info->domain);
+               spin_lock_irqsave(&device_domain_lock, flags);
+       }
+
+       info = kmem_cache_zalloc(iommu_devinfo_cache, GFP_ATOMIC);
+
+       info->dev = dev;
+       info->liodn = liodn;
+       info->domain = dma_domain;
+
+       list_add(&info->link, &dma_domain->devices);
+       /*
+        * In case of devices with multiple LIODNs just store
+        * the info for the first LIODN as all
+        * LIODNs share the same domain
+        */
+       if (!old_domain_info)
+               dev->archdata.iommu_domain = info;
+       spin_unlock_irqrestore(&device_domain_lock, flags);
+
+}
+
+static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain,
+                                           dma_addr_t iova)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+
+       if ((iova < domain->geometry.aperture_start) ||
+               iova > (domain->geometry.aperture_end))
+               return 0;
+
+       return get_phys_addr(dma_domain, iova);
+}
+
+static int fsl_pamu_domain_has_cap(struct iommu_domain *domain,
+                                     unsigned long cap)
+{
+       return cap == IOMMU_CAP_CACHE_COHERENCY;
+}
+
+static void fsl_pamu_domain_destroy(struct iommu_domain *domain)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+
+       domain->priv = NULL;
+
+       /* remove all the devices from the device list */
+       detach_device(NULL, dma_domain);
+
+       dma_domain->enabled = 0;
+       dma_domain->mapped = 0;
+
+       kmem_cache_free(fsl_pamu_domain_cache, dma_domain);
+}
+
+static int fsl_pamu_domain_init(struct iommu_domain *domain)
+{
+       struct fsl_dma_domain *dma_domain;
+
+       dma_domain = iommu_alloc_dma_domain();
+       if (!dma_domain) {
+               pr_debug("dma_domain allocation failed\n");
+               return -ENOMEM;
+       }
+       domain->priv = dma_domain;
+       dma_domain->iommu_domain = domain;
+       /* defaul geometry 64 GB i.e. maximum system address */
+       domain->geometry.aperture_start = 0;
+       domain->geometry.aperture_end = (1ULL << 36) - 1;
+       domain->geometry.force_aperture = true;
+
+       return 0;
+}
+
+/* Configure geometry settings for all LIODNs associated with domain */
+static int pamu_set_domain_geometry(struct fsl_dma_domain *dma_domain,
+                                   struct iommu_domain_geometry *geom_attr,
+                                   u32 win_cnt)
+{
+       struct device_domain_info *info;
+       int ret = 0;
+
+       list_for_each_entry(info, &dma_domain->devices, link) {
+               ret = pamu_set_liodn(info->liodn, info->dev, dma_domain,
+                                     geom_attr, win_cnt);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/* Update stash destination for all LIODNs associated with the domain */
+static int update_domain_stash(struct fsl_dma_domain *dma_domain, u32 val)
+{
+       struct device_domain_info *info;
+       int ret = 0;
+
+       list_for_each_entry(info, &dma_domain->devices, link) {
+               ret = update_liodn_stash(info->liodn, dma_domain, val);
+               if (ret)
+                       break;
+       }
+
+       return ret;
+}
+
+/* Update domain mappings for all LIODNs associated with the domain */
+static int update_domain_mapping(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+       struct device_domain_info *info;
+       int ret = 0;
+
+       list_for_each_entry(info, &dma_domain->devices, link) {
+               ret = update_liodn(info->liodn, dma_domain, wnd_nr);
+               if (ret)
+                       break;
+       }
+       return ret;
+}
+
+static int disable_domain_win(struct fsl_dma_domain *dma_domain, u32 wnd_nr)
+{
+       struct device_domain_info *info;
+       int ret = 0;
+
+       list_for_each_entry(info, &dma_domain->devices, link) {
+               if (dma_domain->win_cnt == 1 && dma_domain->enabled) {
+                       ret = pamu_disable_liodn(info->liodn);
+                       if (!ret)
+                               dma_domain->enabled = 0;
+               } else {
+                       ret = pamu_disable_spaace(info->liodn, wnd_nr);
+               }
+       }
+
+       return ret;
+}
+
+static void fsl_pamu_window_disable(struct iommu_domain *domain, u32 wnd_nr)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       if (!dma_domain->win_arr) {
+               pr_debug("Number of windows not configured\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return;
+       }
+
+       if (wnd_nr >= dma_domain->win_cnt) {
+               pr_debug("Invalid window index\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return;
+       }
+
+       if (dma_domain->win_arr[wnd_nr].valid) {
+               ret = disable_domain_win(dma_domain, wnd_nr);
+               if (!ret) {
+                       dma_domain->win_arr[wnd_nr].valid = 0;
+                       dma_domain->mapped--;
+               }
+       }
+
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+}
+
+static int fsl_pamu_window_enable(struct iommu_domain *domain, u32 wnd_nr,
+                                 phys_addr_t paddr, u64 size, int prot)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       struct dma_window *wnd;
+       int pamu_prot = 0;
+       int ret;
+       unsigned long flags;
+       u64 win_size;
+
+       if (prot & IOMMU_READ)
+               pamu_prot |= PAACE_AP_PERMS_QUERY;
+       if (prot & IOMMU_WRITE)
+               pamu_prot |= PAACE_AP_PERMS_UPDATE;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       if (!dma_domain->win_arr) {
+               pr_debug("Number of windows not configured\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -ENODEV;
+       }
+
+       if (wnd_nr >= dma_domain->win_cnt) {
+               pr_debug("Invalid window index\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       win_size = dma_domain->geom_size >> ilog2(dma_domain->win_cnt);
+       if (size > win_size) {
+               pr_debug("Invalid window size \n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       if (dma_domain->win_cnt == 1) {
+               if (dma_domain->enabled) {
+                       pr_debug("Disable the window before updating the mapping\n");
+                       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+                       return -EBUSY;
+               }
+
+               ret = check_size(size, domain->geometry.aperture_start);
+               if (ret) {
+                       pr_debug("Aperture start not aligned to the size\n");
+                       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+                       return -EINVAL;
+               }
+       }
+
+       wnd = &dma_domain->win_arr[wnd_nr];
+       if (!wnd->valid) {
+               wnd->paddr = paddr;
+               wnd->size = size;
+               wnd->prot = pamu_prot;
+
+               ret = update_domain_mapping(dma_domain, wnd_nr);
+               if (!ret) {
+                       wnd->valid = 1;
+                       dma_domain->mapped++;
+               }
+       } else {
+               pr_debug("Disable the window before updating the mapping\n");
+               ret = -EBUSY;
+       }
+
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return ret;
+}
+
+/*
+ * Attach the LIODN to the DMA domain and configure the geometry
+ * and window mappings.
+ */
+static int handle_attach_device(struct fsl_dma_domain *dma_domain,
+                                struct device *dev, const u32 *liodn,
+                                int num)
+{
+       unsigned long flags;
+       struct iommu_domain *domain = dma_domain->iommu_domain;
+       int ret = 0;
+       int i;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       for (i = 0; i < num; i++) {
+
+               /* Ensure that LIODN value is valid */
+               if (liodn[i] >= PAACE_NUMBER_ENTRIES) {
+                       pr_debug("Invalid liodn %d, attach device failed for %s\n",
+                               liodn[i], dev->of_node->full_name);
+                       ret = -EINVAL;
+                       break;
+               }
+
+               attach_device(dma_domain, liodn[i], dev);
+               /*
+                * Check if geometry has already been configured
+                * for the domain. If yes, set the geometry for
+                * the LIODN.
+                */
+               if (dma_domain->win_arr) {
+                       u32 win_cnt = dma_domain->win_cnt > 1 ? dma_domain->win_cnt : 0;
+                       ret = pamu_set_liodn(liodn[i], dev, dma_domain,
+                                             &domain->geometry,
+                                             win_cnt);
+                       if (ret)
+                               break;
+                       if (dma_domain->mapped) {
+                               /*
+                                * Create window/subwindow mapping for
+                                * the LIODN.
+                                */
+                               ret = map_liodn(liodn[i], dma_domain);
+                               if (ret)
+                                       break;
+                       }
+               }
+       }
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return ret;
+}
+
+static int fsl_pamu_attach_device(struct iommu_domain *domain,
+                                 struct device *dev)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       const u32 *liodn;
+       u32 liodn_cnt;
+       int len, ret = 0;
+       struct pci_dev *pdev = NULL;
+       struct pci_controller *pci_ctl;
+
+       /*
+        * Use LIODN of the PCI controller while attaching a
+        * PCI device.
+        */
+       if (dev->bus == &pci_bus_type) {
+               pdev = to_pci_dev(dev);
+               pci_ctl = pci_bus_to_host(pdev->bus);
+               /*
+                * make dev point to pci controller device
+                * so we can get the LIODN programmed by
+                * u-boot.
+                */
+               dev = pci_ctl->parent;
+       }
+
+       liodn = of_get_property(dev->of_node, "fsl,liodn", &len);
+       if (liodn) {
+               liodn_cnt = len / sizeof(u32);
+               ret = handle_attach_device(dma_domain, dev,
+                                        liodn, liodn_cnt);
+       } else {
+               pr_debug("missing fsl,liodn property at %s\n",
+                         dev->of_node->full_name);
+                       ret = -EINVAL;
+       }
+
+       return ret;
+}
+
+static void fsl_pamu_detach_device(struct iommu_domain *domain,
+                                     struct device *dev)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       const u32 *prop;
+       int len;
+       struct pci_dev *pdev = NULL;
+       struct pci_controller *pci_ctl;
+
+       /*
+        * Use LIODN of the PCI controller while detaching a
+        * PCI device.
+        */
+       if (dev->bus == &pci_bus_type) {
+               pdev = to_pci_dev(dev);
+               pci_ctl = pci_bus_to_host(pdev->bus);
+               /*
+                * make dev point to pci controller device
+                * so we can get the LIODN programmed by
+                * u-boot.
+                */
+               dev = pci_ctl->parent;
+       }
+
+       prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+       if (prop)
+               detach_device(dev, dma_domain);
+       else
+               pr_debug("missing fsl,liodn property at %s\n",
+                         dev->of_node->full_name);
+}
+
+static  int configure_domain_geometry(struct iommu_domain *domain, void *data)
+{
+       struct iommu_domain_geometry *geom_attr = data;
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       dma_addr_t geom_size;
+       unsigned long flags;
+
+       geom_size = geom_attr->aperture_end - geom_attr->aperture_start + 1;
+       /*
+        * Sanity check the geometry size. Also, we do not support
+        * DMA outside of the geometry.
+        */
+       if (check_size(geom_size, geom_attr->aperture_start) ||
+               !geom_attr->force_aperture) {
+                       pr_debug("Invalid PAMU geometry attributes\n");
+                       return -EINVAL;
+               }
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       if (dma_domain->enabled) {
+               pr_debug("Can't set geometry attributes as domain is active\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return  -EBUSY;
+       }
+
+       /* Copy the domain geometry information */
+       memcpy(&domain->geometry, geom_attr,
+              sizeof(struct iommu_domain_geometry));
+       dma_domain->geom_size = geom_size;
+
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return 0;
+}
+
+/* Set the domain stash attribute */
+static int configure_domain_stash(struct fsl_dma_domain *dma_domain, void *data)
+{
+       struct pamu_stash_attribute *stash_attr = data;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+       memcpy(&dma_domain->dma_stash, stash_attr,
+                sizeof(struct pamu_stash_attribute));
+
+       dma_domain->stash_id = get_stash_id(stash_attr->cache,
+                                           stash_attr->cpu);
+       if (dma_domain->stash_id == ~(u32)0) {
+               pr_debug("Invalid stash attributes\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       ret = update_domain_stash(dma_domain, dma_domain->stash_id);
+
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return ret;
+}
+
+/* Configure domain dma state i.e. enable/disable DMA*/
+static int configure_domain_dma_state(struct fsl_dma_domain *dma_domain, bool enable)
+{
+       struct device_domain_info *info;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+
+       if (enable && !dma_domain->mapped) {
+               pr_debug("Can't enable DMA domain without valid mapping\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -ENODEV;
+       }
+
+       dma_domain->enabled = enable;
+       list_for_each_entry(info, &dma_domain->devices,
+                                link) {
+               ret = (enable) ? pamu_enable_liodn(info->liodn) :
+                       pamu_disable_liodn(info->liodn);
+               if (ret)
+                       pr_debug("Unable to set dma state for liodn %d",
+                                info->liodn);
+       }
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return 0;
+}
+
+static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
+                                enum iommu_attr attr_type, void *data)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       int ret = 0;
+
+
+       switch (attr_type) {
+       case DOMAIN_ATTR_GEOMETRY:
+               ret = configure_domain_geometry(domain, data);
+               break;
+       case DOMAIN_ATTR_FSL_PAMU_STASH:
+               ret = configure_domain_stash(dma_domain, data);
+               break;
+       case DOMAIN_ATTR_FSL_PAMU_ENABLE:
+               ret = configure_domain_dma_state(dma_domain, *(int *)data);
+               break;
+       default:
+               pr_debug("Unsupported attribute type\n");
+               ret = -EINVAL;
+               break;
+       };
+
+       return ret;
+}
+
+static int fsl_pamu_get_domain_attr(struct iommu_domain *domain,
+                                enum iommu_attr attr_type, void *data)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       int ret = 0;
+
+
+       switch (attr_type) {
+       case DOMAIN_ATTR_FSL_PAMU_STASH:
+               memcpy((struct pamu_stash_attribute *) data, &dma_domain->dma_stash,
+                                sizeof(struct pamu_stash_attribute));
+               break;
+       case DOMAIN_ATTR_FSL_PAMU_ENABLE:
+               *(int *)data = dma_domain->enabled;
+               break;
+       case DOMAIN_ATTR_FSL_PAMUV1:
+               *(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
+               break;
+       default:
+               pr_debug("Unsupported attribute type\n");
+               ret = -EINVAL;
+               break;
+       };
+
+       return ret;
+}
+
+#define REQ_ACS_FLAGS  (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF)
+
+static struct iommu_group *get_device_iommu_group(struct device *dev)
+{
+       struct iommu_group *group;
+
+       group = iommu_group_get(dev);
+       if (!group)
+               group = iommu_group_alloc();
+
+       return group;
+}
+
+static  bool check_pci_ctl_endpt_part(struct pci_controller *pci_ctl)
+{
+       u32 version;
+
+       /* Check the PCI controller version number by readding BRR1 register */
+       version = in_be32(pci_ctl->cfg_addr + (PCI_FSL_BRR1 >> 2));
+       version &= PCI_FSL_BRR1_VER;
+       /* If PCI controller version is >= 0x204 we can partition endpoints*/
+       if (version >= 0x204)
+               return 1;
+
+       return 0;
+}
+
+/* Get iommu group information from peer devices or devices on the parent bus */
+static struct iommu_group *get_shared_pci_device_group(struct pci_dev *pdev)
+{
+       struct pci_dev *tmp;
+       struct iommu_group *group;
+       struct pci_bus *bus = pdev->bus;
+
+       /*
+        * Traverese the pci bus device list to get
+        * the shared iommu group.
+        */
+       while (bus) {
+               list_for_each_entry(tmp, &bus->devices, bus_list) {
+                       if (tmp == pdev)
+                               continue;
+                       group = iommu_group_get(&tmp->dev);
+                       if (group)
+                               return group;
+               }
+
+               bus = bus->parent;
+       }
+
+       return NULL;
+}
+
+static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
+{
+       struct pci_controller *pci_ctl;
+       bool pci_endpt_partioning;
+       struct iommu_group *group = NULL;
+       struct pci_dev *bridge, *dma_pdev = NULL;
+
+       pci_ctl = pci_bus_to_host(pdev->bus);
+       pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+       /* We can partition PCIe devices so assign device group to the device */
+       if (pci_endpt_partioning) {
+               bridge = pci_find_upstream_pcie_bridge(pdev);
+               if (bridge) {
+                       if (pci_is_pcie(bridge))
+                               dma_pdev = pci_get_domain_bus_and_slot(
+                                               pci_domain_nr(pdev->bus),
+                                               bridge->subordinate->number, 0);
+                       if (!dma_pdev)
+                               dma_pdev = pci_dev_get(bridge);
+               } else
+                       dma_pdev = pci_dev_get(pdev);
+
+               /* Account for quirked devices */
+               swap_pci_ref(&dma_pdev, pci_get_dma_source(dma_pdev));
+
+               /*
+                * If it's a multifunction device that does not support our
+                * required ACS flags, add to the same group as lowest numbered
+                * function that also does not suport the required ACS flags.
+                */
+               if (dma_pdev->multifunction &&
+                   !pci_acs_enabled(dma_pdev, REQ_ACS_FLAGS)) {
+                       u8 i, slot = PCI_SLOT(dma_pdev->devfn);
+
+                       for (i = 0; i < 8; i++) {
+                               struct pci_dev *tmp;
+
+                               tmp = pci_get_slot(dma_pdev->bus, PCI_DEVFN(slot, i));
+                               if (!tmp)
+                                       continue;
+
+                               if (!pci_acs_enabled(tmp, REQ_ACS_FLAGS)) {
+                                       swap_pci_ref(&dma_pdev, tmp);
+                                       break;
+                               }
+                               pci_dev_put(tmp);
+                       }
+               }
+
+               /*
+                * Devices on the root bus go through the iommu.  If that's not us,
+                * find the next upstream device and test ACS up to the root bus.
+                * Finding the next device may require skipping virtual buses.
+                */
+               while (!pci_is_root_bus(dma_pdev->bus)) {
+                       struct pci_bus *bus = dma_pdev->bus;
+
+                       while (!bus->self) {
+                               if (!pci_is_root_bus(bus))
+                                       bus = bus->parent;
+                               else
+                                       goto root_bus;
+                       }
+
+                       if (pci_acs_path_enabled(bus->self, NULL, REQ_ACS_FLAGS))
+                               break;
+
+                       swap_pci_ref(&dma_pdev, pci_dev_get(bus->self));
+               }
+
+root_bus:
+               group = get_device_iommu_group(&dma_pdev->dev);
+               pci_dev_put(dma_pdev);
+               /*
+                * PCIe controller is not a paritionable entity
+                * free the controller device iommu_group.
+                */
+               if (pci_ctl->parent->iommu_group)
+                       iommu_group_remove_device(pci_ctl->parent);
+       } else {
+               /*
+                * All devices connected to the controller will share the
+                * PCI controllers device group. If this is the first
+                * device to be probed for the pci controller, copy the
+                * device group information from the PCI controller device
+                * node and remove the PCI controller iommu group.
+                * For subsequent devices, the iommu group information can
+                * be obtained from sibling devices (i.e. from the bus_devices
+                * link list).
+                */
+               if (pci_ctl->parent->iommu_group) {
+                       group = get_device_iommu_group(pci_ctl->parent);
+                       iommu_group_remove_device(pci_ctl->parent);
+               } else
+                       group = get_shared_pci_device_group(pdev);
+       }
+
+       return group;
+}
+
+static int fsl_pamu_add_device(struct device *dev)
+{
+       struct iommu_group *group = NULL;
+       struct pci_dev *pdev;
+       const u32 *prop;
+       int ret, len;
+
+       /*
+        * For platform devices we allocate a separate group for
+        * each of the devices.
+        */
+       if (dev->bus == &pci_bus_type) {
+               pdev = to_pci_dev(dev);
+               /* Don't create device groups for virtual PCI bridges */
+               if (pdev->subordinate)
+                       return 0;
+
+               group = get_pci_device_group(pdev);
+
+       } else {
+               prop = of_get_property(dev->of_node, "fsl,liodn", &len);
+               if (prop)
+                       group = get_device_iommu_group(dev);
+       }
+
+       if (!group || IS_ERR(group))
+               return PTR_ERR(group);
+
+       ret = iommu_group_add_device(group, dev);
+
+       iommu_group_put(group);
+       return ret;
+}
+
+static void fsl_pamu_remove_device(struct device *dev)
+{
+       iommu_group_remove_device(dev);
+}
+
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+       unsigned long flags;
+       int ret;
+
+       spin_lock_irqsave(&dma_domain->domain_lock, flags);
+       /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+       if (dma_domain->enabled) {
+               pr_debug("Can't set geometry attributes as domain is active\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return  -EBUSY;
+       }
+
+       /* Ensure that the geometry has been set for the domain */
+       if (!dma_domain->geom_size) {
+               pr_debug("Please configure geometry before setting the number of windows\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       /*
+        * Ensure we have valid window count i.e. it should be less than
+        * maximum permissible limit and should be a power of two.
+        */
+       if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+               pr_debug("Invalid window count\n");
+               spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+               return -EINVAL;
+       }
+
+       ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+                               ((w_count > 1) ? w_count : 0));
+       if (!ret) {
+               if (dma_domain->win_arr)
+                       kfree(dma_domain->win_arr);
+               dma_domain->win_arr = kzalloc(sizeof(struct dma_window) *
+                                                         w_count, GFP_ATOMIC);
+               if (!dma_domain->win_arr) {
+                       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+                       return -ENOMEM;
+               }
+               dma_domain->win_cnt = w_count;
+       }
+       spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+       return ret;
+}
+
+static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
+{
+       struct fsl_dma_domain *dma_domain = domain->priv;
+
+       return dma_domain->win_cnt;
+}
+
+static struct iommu_ops fsl_pamu_ops = {
+       .domain_init    = fsl_pamu_domain_init,
+       .domain_destroy = fsl_pamu_domain_destroy,
+       .attach_dev     = fsl_pamu_attach_device,
+       .detach_dev     = fsl_pamu_detach_device,
+       .domain_window_enable = fsl_pamu_window_enable,
+       .domain_window_disable = fsl_pamu_window_disable,
+       .domain_get_windows = fsl_pamu_get_windows,
+       .domain_set_windows = fsl_pamu_set_windows,
+       .iova_to_phys   = fsl_pamu_iova_to_phys,
+       .domain_has_cap = fsl_pamu_domain_has_cap,
+       .domain_set_attr = fsl_pamu_set_domain_attr,
+       .domain_get_attr = fsl_pamu_get_domain_attr,
+       .add_device     = fsl_pamu_add_device,
+       .remove_device  = fsl_pamu_remove_device,
+};
+
+int pamu_domain_init()
+{
+       int ret = 0;
+
+       ret = iommu_init_mempool();
+       if (ret)
+               return ret;
+
+       bus_set_iommu(&platform_bus_type, &fsl_pamu_ops);
+       bus_set_iommu(&pci_bus_type, &fsl_pamu_ops);
+
+       return ret;
+}
diff --git a/drivers/iommu/fsl_pamu_domain.h b/drivers/iommu/fsl_pamu_domain.h
new file mode 100644 (file)
index 0000000..c90293f
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2013 Freescale Semiconductor, Inc.
+ *
+ */
+
+#ifndef __FSL_PAMU_DOMAIN_H
+#define __FSL_PAMU_DOMAIN_H
+
+#include "fsl_pamu.h"
+
+struct dma_window {
+       phys_addr_t paddr;
+       u64 size;
+       int valid;
+       int prot;
+};
+
+struct fsl_dma_domain {
+       /*
+        * Indicates the geometry size for the domain.
+        * This would be set when the geometry is
+        * configured for the domain.
+        */
+       dma_addr_t                      geom_size;
+       /*
+        * Number of windows assocaited with this domain.
+        * During domain initialization, it is set to the
+        * the maximum number of subwindows allowed for a LIODN.
+        * Minimum value for this is 1 indicating a single PAMU
+        * window, without any sub windows. Value can be set/
+        * queried by set_attr/get_attr API for DOMAIN_ATTR_WINDOWS.
+        * Value can only be set once the geometry has been configured.
+        */
+       u32                             win_cnt;
+       /*
+        * win_arr contains information of the configured
+        * windows for a domain. This is allocated only
+        * when the number of windows for the domain are
+        * set.
+        */
+       struct dma_window               *win_arr;
+       /* list of devices associated with the domain */
+       struct list_head                devices;
+       /* dma_domain states:
+        * mapped - A particular mapping has been created
+        * within the configured geometry.
+        * enabled - DMA has been enabled for the given
+        * domain. This translates to setting of the
+        * valid bit for the primary PAACE in the PAMU
+        * PAACT table. Domain geometry should be set and
+        * it must have a valid mapping before DMA can be
+        * enabled for it.
+        *
+        */
+       int                             mapped;
+       int                             enabled;
+       /* stash_id obtained from the stash attribute details */
+       u32                             stash_id;
+       struct pamu_stash_attribute     dma_stash;
+       u32                             snoop_id;
+       struct iommu_domain             *iommu_domain;
+       spinlock_t                      domain_lock;
+};
+
+/* domain-device relationship */
+struct device_domain_info {
+       struct list_head link;  /* link to domain siblings */
+       struct device *dev;
+       u32 liodn;
+       struct fsl_dma_domain *domain; /* pointer to domain */
+};
+#endif  /* __FSL_PAMU_DOMAIN_H */
index eec0d3e..15e9b57 100644 (file)
@@ -890,56 +890,54 @@ static int dma_pte_clear_range(struct dmar_domain *domain,
        return order;
 }
 
+static void dma_pte_free_level(struct dmar_domain *domain, int level,
+                              struct dma_pte *pte, unsigned long pfn,
+                              unsigned long start_pfn, unsigned long last_pfn)
+{
+       pfn = max(start_pfn, pfn);
+       pte = &pte[pfn_level_offset(pfn, level)];
+
+       do {
+               unsigned long level_pfn;
+               struct dma_pte *level_pte;
+
+               if (!dma_pte_present(pte) || dma_pte_superpage(pte))
+                       goto next;
+
+               level_pfn = pfn & level_mask(level - 1);
+               level_pte = phys_to_virt(dma_pte_addr(pte));
+
+               if (level > 2)
+                       dma_pte_free_level(domain, level - 1, level_pte,
+                                          level_pfn, start_pfn, last_pfn);
+
+               /* If range covers entire pagetable, free it */
+               if (!(start_pfn > level_pfn ||
+                     last_pfn < level_pfn + level_size(level))) {
+                       dma_clear_pte(pte);
+                       domain_flush_cache(domain, pte, sizeof(*pte));
+                       free_pgtable_page(level_pte);
+               }
+next:
+               pfn += level_size(level);
+       } while (!first_pte_in_page(++pte) && pfn <= last_pfn);
+}
+
 /* free page table pages. last level pte should already be cleared */
 static void dma_pte_free_pagetable(struct dmar_domain *domain,
                                   unsigned long start_pfn,
                                   unsigned long last_pfn)
 {
        int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
-       struct dma_pte *first_pte, *pte;
-       int total = agaw_to_level(domain->agaw);
-       int level;
-       unsigned long tmp;
-       int large_page = 2;
 
        BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
        BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
        BUG_ON(start_pfn > last_pfn);
 
        /* We don't need lock here; nobody else touches the iova range */
-       level = 2;
-       while (level <= total) {
-               tmp = align_to_level(start_pfn, level);
-
-               /* If we can't even clear one PTE at this level, we're done */
-               if (tmp + level_size(level) - 1 > last_pfn)
-                       return;
-
-               do {
-                       large_page = level;
-                       first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page);
-                       if (large_page > level)
-                               level = large_page + 1;
-                       if (!pte) {
-                               tmp = align_to_level(tmp + 1, level + 1);
-                               continue;
-                       }
-                       do {
-                               if (dma_pte_present(pte)) {
-                                       free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
-                                       dma_clear_pte(pte);
-                               }
-                               pte++;
-                               tmp += level_size(level);
-                       } while (!first_pte_in_page(pte) &&
-                                tmp + level_size(level) - 1 <= last_pfn);
+       dma_pte_free_level(domain, agaw_to_level(domain->agaw),
+                          domain->pgd, 0, start_pfn, last_pfn);
 
-                       domain_flush_cache(domain, first_pte,
-                                          (void *)pte - (void *)first_pte);
-                       
-               } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
-               level++;
-       }
        /* free pgd */
        if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
                free_pgtable_page(domain->pgd);
index 0a1c962..08ba497 100644 (file)
@@ -282,7 +282,6 @@ static int msm_iommu_remove(struct platform_device *pdev)
                clk_put(drv->pclk);
                memset(drv, 0, sizeof(*drv));
                kfree(drv);
-               platform_set_drvdata(pdev, NULL);
        }
        return 0;
 }
@@ -366,7 +365,6 @@ static int msm_iommu_ctx_remove(struct platform_device *pdev)
        if (drv) {
                memset(drv, 0, sizeof(struct msm_iommu_ctx_drvdata));
                kfree(drv);
-               platform_set_drvdata(pdev, NULL);
        }
        return 0;
 }
index 0ba3766..bcd78a7 100644 (file)
@@ -1008,8 +1008,6 @@ static int omap_iommu_remove(struct platform_device *pdev)
        struct resource *res;
        struct omap_iommu *obj = platform_get_drvdata(pdev);
 
-       platform_set_drvdata(pdev, NULL);
-
        iopgtable_clear_entry_all(obj);
 
        irq = platform_get_irq(pdev, 0);
index 074bcb3..875bbe4 100644 (file)
@@ -194,11 +194,11 @@ config LEDS_LP3944
          module will be called leds-lp3944.
 
 config LEDS_LP55XX_COMMON
-       tristate "Common Driver for TI/National LP5521, LP5523/55231 and LP5562"
-       depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562
+       tristate "Common Driver for TI/National LP5521/5523/55231/5562/8501"
+       depends on LEDS_LP5521 || LEDS_LP5523 || LEDS_LP5562 || LEDS_LP8501
        select FW_LOADER
        help
-         This option supports common operations for LP5521 and LP5523/55231
+         This option supports common operations for LP5521/5523/55231/5562/8501
          devices.
 
 config LEDS_LP5521
@@ -232,6 +232,18 @@ config LEDS_LP5562
          Driver provides direct control via LED class and interface for
          programming the engines.
 
+config LEDS_LP8501
+       tristate "LED Support for TI LP8501 LED driver chip"
+       depends on LEDS_CLASS && I2C
+       select LEDS_LP55XX_COMMON
+       help
+         If you say yes here you get support for TI LP8501 LED driver.
+         It is 9 channel chip with programmable engines.
+         Driver provides direct control via LED class and interface for
+         programming the engines.
+         It is similar as LP5523, but output power selection is available.
+         And register layout and engine program schemes are different.
+
 config LEDS_LP8788
        tristate "LED support for the TI LP8788 PMIC"
        depends on LEDS_CLASS
@@ -279,13 +291,14 @@ config LEDS_PCA955X
          LED driver chips accessed via the I2C bus.  Supported
          devices include PCA9550, PCA9551, PCA9552, and PCA9553.
 
-config LEDS_PCA9633
-       tristate "LED support for PCA9633 I2C chip"
+config LEDS_PCA963X
+       tristate "LED support for PCA963x I2C chip"
        depends on LEDS_CLASS
        depends on I2C
        help
-         This option enables support for LEDs connected to the PCA9633
-         LED driver chip accessed via the I2C bus.
+         This option enables support for LEDs connected to the PCA963x
+         LED driver chip accessed via the I2C bus. Supported
+         devices include PCA9633 and PCA9634
 
 config LEDS_WM831X_STATUS
        tristate "LED support for status LEDs on WM831x PMICs"
@@ -398,10 +411,7 @@ config LEDS_MC13783
 config LEDS_NS2
        tristate "LED support for Network Space v2 GPIO LEDs"
        depends on LEDS_CLASS
-       depends on MACH_NETSPACE_V2 || MACH_INETSPACE_V2 || \
-                  MACH_NETSPACE_MAX_V2 || MACH_D2NET_V2 || \
-                  MACH_NETSPACE_V2_DT || MACH_INETSPACE_V2_DT || \
-                  MACH_NETSPACE_MAX_V2_DT || MACH_NETSPACE_MINI_V2_DT
+       depends on ARCH_KIRKWOOD
        default y
        help
          This option enable support for the dual-GPIO LED found on the
@@ -410,8 +420,8 @@ config LEDS_NS2
 
 config LEDS_NETXBIG
        tristate "LED support for Big Network series LEDs"
-       depends on MACH_NET2BIG_V2 || MACH_NET5BIG_V2
        depends on LEDS_CLASS
+       depends on ARCH_KIRKWOOD
        default y
        help
          This option enable support for LEDs found on the LaCie 2Big
index ae4b613..8979b0b 100644 (file)
@@ -27,6 +27,7 @@ obj-$(CONFIG_LEDS_LP55XX_COMMON)      += leds-lp55xx-common.o
 obj-$(CONFIG_LEDS_LP5521)              += leds-lp5521.o
 obj-$(CONFIG_LEDS_LP5523)              += leds-lp5523.o
 obj-$(CONFIG_LEDS_LP5562)              += leds-lp5562.o
+obj-$(CONFIG_LEDS_LP8501)              += leds-lp8501.o
 obj-$(CONFIG_LEDS_LP8788)              += leds-lp8788.o
 obj-$(CONFIG_LEDS_TCA6507)             += leds-tca6507.o
 obj-$(CONFIG_LEDS_CLEVO_MAIL)          += leds-clevo-mail.o
@@ -34,7 +35,7 @@ obj-$(CONFIG_LEDS_HP6XX)              += leds-hp6xx.o
 obj-$(CONFIG_LEDS_OT200)               += leds-ot200.o
 obj-$(CONFIG_LEDS_FSG)                 += leds-fsg.o
 obj-$(CONFIG_LEDS_PCA955X)             += leds-pca955x.o
-obj-$(CONFIG_LEDS_PCA9633)             += leds-pca9633.o
+obj-$(CONFIG_LEDS_PCA963X)             += leds-pca963x.o
 obj-$(CONFIG_LEDS_DA903X)              += leds-da903x.o
 obj-$(CONFIG_LEDS_DA9052)              += leds-da9052.o
 obj-$(CONFIG_LEDS_WM831X_STATUS)       += leds-wm831x-status.o
index 232b3ce..5f588c0 100644 (file)
@@ -157,7 +157,7 @@ static int pm860x_led_dt_init(struct platform_device *pdev,
 static int pm860x_led_probe(struct platform_device *pdev)
 {
        struct pm860x_chip *chip = dev_get_drvdata(pdev->dev.parent);
-       struct pm860x_led_pdata *pdata = pdev->dev.platform_data;
+       struct pm860x_led_pdata *pdata = dev_get_platdata(&pdev->dev);
        struct pm860x_led *data;
        struct resource *res;
        int ret = 0;
index e8072ab..7e311a1 100644 (file)
@@ -87,7 +87,7 @@ static int adp5520_led_setup(struct adp5520_led *led)
 
 static int adp5520_led_prepare(struct platform_device *pdev)
 {
-       struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data;
+       struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct device *dev = pdev->dev.parent;
        int ret = 0;
 
@@ -103,7 +103,7 @@ static int adp5520_led_prepare(struct platform_device *pdev)
 
 static int adp5520_led_probe(struct platform_device *pdev)
 {
-       struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data;
+       struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct adp5520_led *led, *led_dat;
        struct led_info *cur_led;
        int ret, i;
@@ -185,7 +185,7 @@ err:
 
 static int adp5520_led_remove(struct platform_device *pdev)
 {
-       struct adp5520_leds_platform_data *pdata = pdev->dev.platform_data;
+       struct adp5520_leds_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct adp5520_led *led;
        int i;
 
index cf9efe4..6de216a 100644 (file)
@@ -94,7 +94,7 @@ static int blink_set(struct led_classdev *cdev,
 
 static int asic3_led_probe(struct platform_device *pdev)
 {
-       struct asic3_led *led = pdev->dev.platform_data;
+       struct asic3_led *led = dev_get_platdata(&pdev->dev);
        int ret;
 
        ret = mfd_cell_enable(pdev);
@@ -127,7 +127,7 @@ out:
 
 static int asic3_led_remove(struct platform_device *pdev)
 {
-       struct asic3_led *led = pdev->dev.platform_data;
+       struct asic3_led *led = dev_get_platdata(&pdev->dev);
 
        led_classdev_unregister(led->cdev);
 
index 90518f8..56cec8d 100644 (file)
@@ -42,7 +42,7 @@ static int pwmled_probe(struct platform_device *pdev)
        int                                     i;
        int                                     status;
 
-       pdata = pdev->dev.platform_data;
+       pdata = dev_get_platdata(&pdev->dev);
        if (!pdata || pdata->num_leds < 1)
                return -ENODEV;
 
@@ -119,7 +119,7 @@ static int pwmled_remove(struct platform_device *pdev)
        struct pwmled                           *leds;
        unsigned                                i;
 
-       pdata = pdev->dev.platform_data;
+       pdata = dev_get_platdata(&pdev->dev);
        leds = platform_get_drvdata(pdev);
 
        for (i = 0; i < pdata->num_leds; i++) {
index 2db0423..fb5a347 100644 (file)
@@ -684,7 +684,7 @@ static int bd2802_probe(struct i2c_client *client,
        }
 
        led->client = client;
-       pdata = led->pdata = client->dev.platform_data;
+       pdata = led->pdata = dev_get_platdata(&client->dev);
        i2c_set_clientdata(client, led);
 
        /* Configure RESET GPIO (L: RESET, H: RESET cancel) */
index 6a8405d..d93e245 100644 (file)
@@ -40,7 +40,7 @@ static int __init clevo_mail_led_dmi_callback(const struct dmi_system_id *id)
  * detected as working, but in reality it is not) as low as
  * possible.
  */
-static struct dmi_system_id __initdata clevo_mail_led_dmi_table[] = {
+static struct dmi_system_id clevo_mail_led_dmi_table[] __initdata = {
        {
                .callback = clevo_mail_led_dmi_callback,
                .ident = "Clevo D410J",
index c263a21..2a4b87f 100644 (file)
@@ -93,7 +93,7 @@ static void da903x_led_set(struct led_classdev *led_cdev,
 
 static int da903x_led_probe(struct platform_device *pdev)
 {
-       struct led_info *pdata = pdev->dev.platform_data;
+       struct led_info *pdata = dev_get_platdata(&pdev->dev);
        struct da903x_led *led;
        int id, ret;
 
index efec433..865d4fa 100644 (file)
@@ -112,7 +112,7 @@ static int da9052_led_probe(struct platform_device *pdev)
        int i;
 
        da9052 = dev_get_drvdata(pdev->dev.parent);
-       pdata = da9052->dev->platform_data;
+       pdata = dev_get_platdata(da9052->dev);
        if (pdata == NULL) {
                dev_err(&pdev->dev, "No platform data\n");
                goto err;
@@ -185,7 +185,7 @@ static int da9052_led_remove(struct platform_device *pdev)
        int i;
 
        da9052 = dev_get_drvdata(pdev->dev.parent);
-       pdata = da9052->dev->platform_data;
+       pdata = dev_get_platdata(da9052->dev);
        pled = pdata->pled;
 
        for (i = 0; i < pled->num_leds; i++) {
index 84d74c3..e8b01e5 100644 (file)
@@ -233,7 +233,7 @@ static struct gpio_leds_priv *gpio_leds_create_of(struct platform_device *pdev)
 
 static int gpio_led_probe(struct platform_device *pdev)
 {
-       struct gpio_led_platform_data *pdata = pdev->dev.platform_data;
+       struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct gpio_leds_priv *priv;
        int i, ret = 0;
 
index a036a19..652368c 100644 (file)
@@ -403,7 +403,7 @@ static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set);
 static int lm3530_probe(struct i2c_client *client,
                           const struct i2c_device_id *id)
 {
-       struct lm3530_platform_data *pdata = client->dev.platform_data;
+       struct lm3530_platform_data *pdata = dev_get_platdata(&client->dev);
        struct lm3530_data *drvdata;
        int err = 0;
 
index bbf24d0..027ede7 100644 (file)
@@ -671,7 +671,7 @@ static int lm3533_led_probe(struct platform_device *pdev)
        if (!lm3533)
                return -EINVAL;
 
-       pdata = pdev->dev.platform_data;
+       pdata = dev_get_platdata(&pdev->dev);
        if (!pdata) {
                dev_err(&pdev->dev, "no platform data\n");
                return -EINVAL;
index d81a8e7..591eb5e 100644 (file)
@@ -423,7 +423,7 @@ static const struct regmap_config lm355x_regmap = {
 static int lm355x_probe(struct i2c_client *client,
                                  const struct i2c_device_id *id)
 {
-       struct lm355x_platform_data *pdata = client->dev.platform_data;
+       struct lm355x_platform_data *pdata = dev_get_platdata(&client->dev);
        struct lm355x_chip_data *chip;
 
        int err;
index f361bbe..ceb6b3c 100644 (file)
@@ -316,7 +316,7 @@ static const struct regmap_config lm3642_regmap = {
 static int lm3642_probe(struct i2c_client *client,
                                  const struct i2c_device_id *id)
 {
-       struct lm3642_platform_data *pdata = client->dev.platform_data;
+       struct lm3642_platform_data *pdata = dev_get_platdata(&client->dev);
        struct lm3642_chip_data *chip;
 
        int err;
index 0c4386e..8e1abdc 100644 (file)
@@ -289,7 +289,7 @@ static void lp3944_led_set_brightness(struct led_classdev *led_cdev,
        dev_dbg(&led->client->dev, "%s: %s, %d\n",
                __func__, led_cdev->name, brightness);
 
-       led->status = brightness;
+       led->status = !!brightness;
        schedule_work(&led->work);
 }
 
@@ -377,7 +377,8 @@ exit:
 static int lp3944_probe(struct i2c_client *client,
                                  const struct i2c_device_id *id)
 {
-       struct lp3944_platform_data *lp3944_pdata = client->dev.platform_data;
+       struct lp3944_platform_data *lp3944_pdata =
+                       dev_get_platdata(&client->dev);
        struct lp3944_data *data;
        int err;
 
@@ -413,7 +414,7 @@ static int lp3944_probe(struct i2c_client *client,
 
 static int lp3944_remove(struct i2c_client *client)
 {
-       struct lp3944_platform_data *pdata = client->dev.platform_data;
+       struct lp3944_platform_data *pdata = dev_get_platdata(&client->dev);
        struct lp3944_data *data = i2c_get_clientdata(client);
        int i;
 
index 1392feb..0518835 100644 (file)
@@ -220,17 +220,11 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip,
        };
        unsigned cmd;
        char c[3];
-       int program_size;
        int nrchars;
-       int offset = 0;
        int ret;
-       int i;
-
-       /* clear program memory before updating */
-       for (i = 0; i < LP5521_PROGRAM_LENGTH; i++)
-               lp55xx_write(chip, addr[idx] + i, 0);
+       int offset = 0;
+       int i = 0;
 
-       i = 0;
        while ((offset < size - 1) && (i < LP5521_PROGRAM_LENGTH)) {
                /* separate sscanfs because length is working only for %s */
                ret = sscanf(data + offset, "%2s%n ", c, &nrchars);
@@ -250,11 +244,19 @@ static int lp5521_update_program_memory(struct lp55xx_chip *chip,
        if (i % 2)
                goto err;
 
-       program_size = i;
-       for (i = 0; i < program_size; i++)
-               lp55xx_write(chip, addr[idx] + i, pattern[i]);
+       mutex_lock(&chip->lock);
 
-       return 0;
+       for (i = 0; i < LP5521_PROGRAM_LENGTH; i++) {
+               ret = lp55xx_write(chip, addr[idx] + i, pattern[i]);
+               if (ret) {
+                       mutex_unlock(&chip->lock);
+                       return -EINVAL;
+               }
+       }
+
+       mutex_unlock(&chip->lock);
+
+       return size;
 
 err:
        dev_err(&chip->cl->dev, "wrong pattern format\n");
@@ -365,6 +367,80 @@ static void lp5521_led_brightness_work(struct work_struct *work)
        mutex_unlock(&chip->lock);
 }
 
+static ssize_t show_engine_mode(struct device *dev,
+                               struct device_attribute *attr,
+                               char *buf, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode;
+
+       switch (mode) {
+       case LP55XX_ENGINE_RUN:
+               return sprintf(buf, "run\n");
+       case LP55XX_ENGINE_LOAD:
+               return sprintf(buf, "load\n");
+       case LP55XX_ENGINE_DISABLED:
+       default:
+               return sprintf(buf, "disabled\n");
+       }
+}
+show_mode(1)
+show_mode(2)
+show_mode(3)
+
+static ssize_t store_engine_mode(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t len, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       struct lp55xx_engine *engine = &chip->engines[nr - 1];
+
+       mutex_lock(&chip->lock);
+
+       chip->engine_idx = nr;
+
+       if (!strncmp(buf, "run", 3)) {
+               lp5521_run_engine(chip, true);
+               engine->mode = LP55XX_ENGINE_RUN;
+       } else if (!strncmp(buf, "load", 4)) {
+               lp5521_stop_engine(chip);
+               lp5521_load_engine(chip);
+               engine->mode = LP55XX_ENGINE_LOAD;
+       } else if (!strncmp(buf, "disabled", 8)) {
+               lp5521_stop_engine(chip);
+               engine->mode = LP55XX_ENGINE_DISABLED;
+       }
+
+       mutex_unlock(&chip->lock);
+
+       return len;
+}
+store_mode(1)
+store_mode(2)
+store_mode(3)
+
+static ssize_t store_engine_load(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t len, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+
+       mutex_lock(&chip->lock);
+
+       chip->engine_idx = nr;
+       lp5521_load_engine(chip);
+
+       mutex_unlock(&chip->lock);
+
+       return lp5521_update_program_memory(chip, buf, len);
+}
+store_load(1)
+store_load(2)
+store_load(3)
+
 static ssize_t lp5521_selftest(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
@@ -381,9 +457,21 @@ static ssize_t lp5521_selftest(struct device *dev,
 }
 
 /* device attributes */
-static DEVICE_ATTR(selftest, S_IRUGO, lp5521_selftest, NULL);
+static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode);
+static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode);
+static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode);
+static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load);
+static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load);
+static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load);
+static LP55XX_DEV_ATTR_RO(selftest, lp5521_selftest);
 
 static struct attribute *lp5521_attributes[] = {
+       &dev_attr_engine1_mode.attr,
+       &dev_attr_engine2_mode.attr,
+       &dev_attr_engine3_mode.attr,
+       &dev_attr_engine1_load.attr,
+       &dev_attr_engine2_load.attr,
+       &dev_attr_engine3_load.attr,
        &dev_attr_selftest.attr,
        NULL
 };
@@ -420,7 +508,7 @@ static int lp5521_probe(struct i2c_client *client,
        struct lp55xx_platform_data *pdata;
        struct device_node *np = client->dev.of_node;
 
-       if (!client->dev.platform_data) {
+       if (!dev_get_platdata(&client->dev)) {
                if (np) {
                        ret = lp55xx_of_populate_pdata(&client->dev, np);
                        if (ret < 0)
@@ -430,7 +518,7 @@ static int lp5521_probe(struct i2c_client *client,
                        return -EINVAL;
                }
        }
-       pdata = client->dev.platform_data;
+       pdata = dev_get_platdata(&client->dev);
 
        chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
        if (!chip)
index 3979428..fe3bcbb 100644 (file)
@@ -49,6 +49,9 @@
 #define LP5523_REG_RESET               0x3D
 #define LP5523_REG_LED_TEST_CTRL       0x41
 #define LP5523_REG_LED_TEST_ADC                0x42
+#define LP5523_REG_CH1_PROG_START      0x4C
+#define LP5523_REG_CH2_PROG_START      0x4D
+#define LP5523_REG_CH3_PROG_START      0x4E
 #define LP5523_REG_PROG_PAGE_SEL       0x4F
 #define LP5523_REG_PROG_MEM            0x50
 
 #define LP5523_RESET                   0xFF
 #define LP5523_ADC_SHORTCIRC_LIM       80
 #define LP5523_EXT_CLK_USED            0x08
+#define LP5523_ENG_STATUS_MASK         0x07
 
 /* Memory Page Selection */
 #define LP5523_PAGE_ENG1               0
 #define LP5523_PAGE_ENG2               1
 #define LP5523_PAGE_ENG3               2
+#define LP5523_PAGE_MUX1               3
+#define LP5523_PAGE_MUX2               4
+#define LP5523_PAGE_MUX3               5
 
 /* Program Memory Operations */
 #define LP5523_MODE_ENG1_M             0x30    /* Operation Mode Register */
 #define LP5523_RUN_ENG2                        0x08
 #define LP5523_RUN_ENG3                        0x02
 
+#define LED_ACTIVE(mux, led)           (!!(mux & (0x0001 << led)))
+
 enum lp5523_chip_id {
        LP5523,
        LP55231,
 };
 
+static int lp5523_init_program_engine(struct lp55xx_chip *chip);
+
 static inline void lp5523_wait_opmode_done(void)
 {
        usleep_range(1000, 2000);
@@ -134,7 +145,11 @@ static int lp5523_post_init_device(struct lp55xx_chip *chip)
        if (ret)
                return ret;
 
-       return lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff);
+       ret = lp55xx_write(chip, LP5523_REG_ENABLE_LEDS_LSB, 0xff);
+       if (ret)
+               return ret;
+
+       return lp5523_init_program_engine(chip);
 }
 
 static void lp5523_load_engine(struct lp55xx_chip *chip)
@@ -152,15 +167,21 @@ static void lp5523_load_engine(struct lp55xx_chip *chip)
                [LP55XX_ENGINE_3] = LP5523_LOAD_ENG3,
        };
 
+       lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]);
+
+       lp5523_wait_opmode_done();
+}
+
+static void lp5523_load_engine_and_select_page(struct lp55xx_chip *chip)
+{
+       enum lp55xx_engine_index idx = chip->engine_idx;
        u8 page_sel[] = {
                [LP55XX_ENGINE_1] = LP5523_PAGE_ENG1,
                [LP55XX_ENGINE_2] = LP5523_PAGE_ENG2,
                [LP55XX_ENGINE_3] = LP5523_PAGE_ENG3,
        };
 
-       lp55xx_update_bits(chip, LP5523_REG_OP_MODE, mask[idx], val[idx]);
-
-       lp5523_wait_opmode_done();
+       lp5523_load_engine(chip);
 
        lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, page_sel[idx]);
 }
@@ -227,23 +248,75 @@ static void lp5523_run_engine(struct lp55xx_chip *chip, bool start)
        lp55xx_update_bits(chip, LP5523_REG_ENABLE, LP5523_EXEC_M, exec);
 }
 
+static int lp5523_init_program_engine(struct lp55xx_chip *chip)
+{
+       int i;
+       int j;
+       int ret;
+       u8 status;
+       /* one pattern per engine setting LED MUX start and stop addresses */
+       static const u8 pattern[][LP5523_PROGRAM_LENGTH] =  {
+               { 0x9c, 0x30, 0x9c, 0xb0, 0x9d, 0x80, 0xd8, 0x00, 0},
+               { 0x9c, 0x40, 0x9c, 0xc0, 0x9d, 0x80, 0xd8, 0x00, 0},
+               { 0x9c, 0x50, 0x9c, 0xd0, 0x9d, 0x80, 0xd8, 0x00, 0},
+       };
+
+       /* hardcode 32 bytes of memory for each engine from program memory */
+       ret = lp55xx_write(chip, LP5523_REG_CH1_PROG_START, 0x00);
+       if (ret)
+               return ret;
+
+       ret = lp55xx_write(chip, LP5523_REG_CH2_PROG_START, 0x10);
+       if (ret)
+               return ret;
+
+       ret = lp55xx_write(chip, LP5523_REG_CH3_PROG_START, 0x20);
+       if (ret)
+               return ret;
+
+       /* write LED MUX address space for each engine */
+       for (i = LP55XX_ENGINE_1; i <= LP55XX_ENGINE_3; i++) {
+               chip->engine_idx = i;
+               lp5523_load_engine_and_select_page(chip);
+
+               for (j = 0; j < LP5523_PROGRAM_LENGTH; j++) {
+                       ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + j,
+                                       pattern[i - 1][j]);
+                       if (ret)
+                               goto out;
+               }
+       }
+
+       lp5523_run_engine(chip, true);
+
+       /* Let the programs run for couple of ms and check the engine status */
+       usleep_range(3000, 6000);
+       lp55xx_read(chip, LP5523_REG_STATUS, &status);
+       status &= LP5523_ENG_STATUS_MASK;
+
+       if (status != LP5523_ENG_STATUS_MASK) {
+               dev_err(&chip->cl->dev,
+                       "cound not configure LED engine, status = 0x%.2x\n",
+                       status);
+               ret = -1;
+       }
+
+out:
+       lp5523_stop_engine(chip);
+       return ret;
+}
+
 static int lp5523_update_program_memory(struct lp55xx_chip *chip,
                                        const u8 *data, size_t size)
 {
        u8 pattern[LP5523_PROGRAM_LENGTH] = {0};
        unsigned cmd;
        char c[3];
-       int update_size;
        int nrchars;
-       int offset = 0;
        int ret;
-       int i;
-
-       /* clear program memory before updating */
-       for (i = 0; i < LP5523_PROGRAM_LENGTH; i++)
-               lp55xx_write(chip, LP5523_REG_PROG_MEM + i, 0);
+       int offset = 0;
+       int i = 0;
 
-       i = 0;
        while ((offset < size - 1) && (i < LP5523_PROGRAM_LENGTH)) {
                /* separate sscanfs because length is working only for %s */
                ret = sscanf(data + offset, "%2s%n ", c, &nrchars);
@@ -263,11 +336,19 @@ static int lp5523_update_program_memory(struct lp55xx_chip *chip,
        if (i % 2)
                goto err;
 
-       update_size = i;
-       for (i = 0; i < update_size; i++)
-               lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]);
+       mutex_lock(&chip->lock);
 
-       return 0;
+       for (i = 0; i < LP5523_PROGRAM_LENGTH; i++) {
+               ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + i, pattern[i]);
+               if (ret) {
+                       mutex_unlock(&chip->lock);
+                       return -EINVAL;
+               }
+       }
+
+       mutex_unlock(&chip->lock);
+
+       return size;
 
 err:
        dev_err(&chip->cl->dev, "wrong pattern format\n");
@@ -290,10 +371,196 @@ static void lp5523_firmware_loaded(struct lp55xx_chip *chip)
         *  2) write firmware data into program memory
         */
 
-       lp5523_load_engine(chip);
+       lp5523_load_engine_and_select_page(chip);
        lp5523_update_program_memory(chip, fw->data, fw->size);
 }
 
+static ssize_t show_engine_mode(struct device *dev,
+                               struct device_attribute *attr,
+                               char *buf, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       enum lp55xx_engine_mode mode = chip->engines[nr - 1].mode;
+
+       switch (mode) {
+       case LP55XX_ENGINE_RUN:
+               return sprintf(buf, "run\n");
+       case LP55XX_ENGINE_LOAD:
+               return sprintf(buf, "load\n");
+       case LP55XX_ENGINE_DISABLED:
+       default:
+               return sprintf(buf, "disabled\n");
+       }
+}
+show_mode(1)
+show_mode(2)
+show_mode(3)
+
+static ssize_t store_engine_mode(struct device *dev,
+                                struct device_attribute *attr,
+                                const char *buf, size_t len, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       struct lp55xx_engine *engine = &chip->engines[nr - 1];
+
+       mutex_lock(&chip->lock);
+
+       chip->engine_idx = nr;
+
+       if (!strncmp(buf, "run", 3)) {
+               lp5523_run_engine(chip, true);
+               engine->mode = LP55XX_ENGINE_RUN;
+       } else if (!strncmp(buf, "load", 4)) {
+               lp5523_stop_engine(chip);
+               lp5523_load_engine(chip);
+               engine->mode = LP55XX_ENGINE_LOAD;
+       } else if (!strncmp(buf, "disabled", 8)) {
+               lp5523_stop_engine(chip);
+               engine->mode = LP55XX_ENGINE_DISABLED;
+       }
+
+       mutex_unlock(&chip->lock);
+
+       return len;
+}
+store_mode(1)
+store_mode(2)
+store_mode(3)
+
+static int lp5523_mux_parse(const char *buf, u16 *mux, size_t len)
+{
+       u16 tmp_mux = 0;
+       int i;
+
+       len = min_t(int, len, LP5523_MAX_LEDS);
+
+       for (i = 0; i < len; i++) {
+               switch (buf[i]) {
+               case '1':
+                       tmp_mux |= (1 << i);
+                       break;
+               case '0':
+                       break;
+               case '\n':
+                       i = len;
+                       break;
+               default:
+                       return -1;
+               }
+       }
+       *mux = tmp_mux;
+
+       return 0;
+}
+
+static void lp5523_mux_to_array(u16 led_mux, char *array)
+{
+       int i, pos = 0;
+       for (i = 0; i < LP5523_MAX_LEDS; i++)
+               pos += sprintf(array + pos, "%x", LED_ACTIVE(led_mux, i));
+
+       array[pos] = '\0';
+}
+
+static ssize_t show_engine_leds(struct device *dev,
+                           struct device_attribute *attr,
+                           char *buf, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       char mux[LP5523_MAX_LEDS + 1];
+
+       lp5523_mux_to_array(chip->engines[nr - 1].led_mux, mux);
+
+       return sprintf(buf, "%s\n", mux);
+}
+show_leds(1)
+show_leds(2)
+show_leds(3)
+
+static int lp5523_load_mux(struct lp55xx_chip *chip, u16 mux, int nr)
+{
+       struct lp55xx_engine *engine = &chip->engines[nr - 1];
+       int ret;
+       u8 mux_page[] = {
+               [LP55XX_ENGINE_1] = LP5523_PAGE_MUX1,
+               [LP55XX_ENGINE_2] = LP5523_PAGE_MUX2,
+               [LP55XX_ENGINE_3] = LP5523_PAGE_MUX3,
+       };
+
+       lp5523_load_engine(chip);
+
+       ret = lp55xx_write(chip, LP5523_REG_PROG_PAGE_SEL, mux_page[nr]);
+       if (ret)
+               return ret;
+
+       ret = lp55xx_write(chip, LP5523_REG_PROG_MEM , (u8)(mux >> 8));
+       if (ret)
+               return ret;
+
+       ret = lp55xx_write(chip, LP5523_REG_PROG_MEM + 1, (u8)(mux));
+       if (ret)
+               return ret;
+
+       engine->led_mux = mux;
+       return 0;
+}
+
+static ssize_t store_engine_leds(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t len, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+       struct lp55xx_engine *engine = &chip->engines[nr - 1];
+       u16 mux = 0;
+       ssize_t ret;
+
+       if (lp5523_mux_parse(buf, &mux, len))
+               return -EINVAL;
+
+       mutex_lock(&chip->lock);
+
+       chip->engine_idx = nr;
+       ret = -EINVAL;
+
+       if (engine->mode != LP55XX_ENGINE_LOAD)
+               goto leave;
+
+       if (lp5523_load_mux(chip, mux, nr))
+               goto leave;
+
+       ret = len;
+leave:
+       mutex_unlock(&chip->lock);
+       return ret;
+}
+store_leds(1)
+store_leds(2)
+store_leds(3)
+
+static ssize_t store_engine_load(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t len, int nr)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(to_i2c_client(dev));
+       struct lp55xx_chip *chip = led->chip;
+
+       mutex_lock(&chip->lock);
+
+       chip->engine_idx = nr;
+       lp5523_load_engine_and_select_page(chip);
+
+       mutex_unlock(&chip->lock);
+
+       return lp5523_update_program_memory(chip, buf, len);
+}
+store_load(1)
+store_load(2)
+store_load(3)
+
 static ssize_t lp5523_selftest(struct device *dev,
                               struct device_attribute *attr,
                               char *buf)
@@ -393,9 +660,27 @@ static void lp5523_led_brightness_work(struct work_struct *work)
        mutex_unlock(&chip->lock);
 }
 
-static DEVICE_ATTR(selftest, S_IRUGO, lp5523_selftest, NULL);
+static LP55XX_DEV_ATTR_RW(engine1_mode, show_engine1_mode, store_engine1_mode);
+static LP55XX_DEV_ATTR_RW(engine2_mode, show_engine2_mode, store_engine2_mode);
+static LP55XX_DEV_ATTR_RW(engine3_mode, show_engine3_mode, store_engine3_mode);
+static LP55XX_DEV_ATTR_RW(engine1_leds, show_engine1_leds, store_engine1_leds);
+static LP55XX_DEV_ATTR_RW(engine2_leds, show_engine2_leds, store_engine2_leds);
+static LP55XX_DEV_ATTR_RW(engine3_leds, show_engine3_leds, store_engine3_leds);
+static LP55XX_DEV_ATTR_WO(engine1_load, store_engine1_load);
+static LP55XX_DEV_ATTR_WO(engine2_load, store_engine2_load);
+static LP55XX_DEV_ATTR_WO(engine3_load, store_engine3_load);
+static LP55XX_DEV_ATTR_RO(selftest, lp5523_selftest);
 
 static struct attribute *lp5523_attributes[] = {
+       &dev_attr_engine1_mode.attr,
+       &dev_attr_engine2_mode.attr,
+       &dev_attr_engine3_mode.attr,
+       &dev_attr_engine1_load.attr,
+       &dev_attr_engine2_load.attr,
+       &dev_attr_engine3_load.attr,
+       &dev_attr_engine1_leds.attr,
+       &dev_attr_engine2_leds.attr,
+       &dev_attr_engine3_leds.attr,
        &dev_attr_selftest.attr,
        NULL,
 };
@@ -432,7 +717,7 @@ static int lp5523_probe(struct i2c_client *client,
        struct lp55xx_platform_data *pdata;
        struct device_node *np = client->dev.of_node;
 
-       if (!client->dev.platform_data) {
+       if (!dev_get_platdata(&client->dev)) {
                if (np) {
                        ret = lp55xx_of_populate_pdata(&client->dev, np);
                        if (ret < 0)
@@ -442,7 +727,7 @@ static int lp5523_probe(struct i2c_client *client,
                        return -EINVAL;
                }
        }
-       pdata = client->dev.platform_data;
+       pdata = dev_get_platdata(&client->dev);
 
        chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
        if (!chip)
index cbd856d..2585cfd 100644 (file)
@@ -477,8 +477,8 @@ static ssize_t lp5562_store_engine_mux(struct device *dev,
        return len;
 }
 
-static DEVICE_ATTR(led_pattern, S_IWUSR, NULL, lp5562_store_pattern);
-static DEVICE_ATTR(engine_mux, S_IWUSR, NULL, lp5562_store_engine_mux);
+static LP55XX_DEV_ATTR_WO(led_pattern, lp5562_store_pattern);
+static LP55XX_DEV_ATTR_WO(engine_mux, lp5562_store_engine_mux);
 
 static struct attribute *lp5562_attributes[] = {
        &dev_attr_led_pattern.attr,
@@ -518,7 +518,7 @@ static int lp5562_probe(struct i2c_client *client,
        struct lp55xx_platform_data *pdata;
        struct device_node *np = client->dev.of_node;
 
-       if (!client->dev.platform_data) {
+       if (!dev_get_platdata(&client->dev)) {
                if (np) {
                        ret = lp55xx_of_populate_pdata(&client->dev, np);
                        if (ret < 0)
@@ -528,7 +528,7 @@ static int lp5562_probe(struct i2c_client *client,
                        return -EINVAL;
                }
        }
-       pdata = client->dev.platform_data;
+       pdata = dev_get_platdata(&client->dev);
 
        chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
        if (!chip)
index c2fecd4..351825b 100644 (file)
@@ -593,6 +593,9 @@ int lp55xx_of_populate_pdata(struct device *dev, struct device_node *np)
        of_property_read_string(np, "label", &pdata->label);
        of_property_read_u8(np, "clock-mode", &pdata->clock_mode);
 
+       /* LP8501 specific */
+       of_property_read_u8(np, "pwr-sel", (u8 *)&pdata->pwr_sel);
+
        dev->platform_data = pdata;
 
        return 0;
index dbbf86d..cceab48 100644 (file)
@@ -20,8 +20,62 @@ enum lp55xx_engine_index {
        LP55XX_ENGINE_1,
        LP55XX_ENGINE_2,
        LP55XX_ENGINE_3,
+       LP55XX_ENGINE_MAX = LP55XX_ENGINE_3,
 };
 
+enum lp55xx_engine_mode {
+       LP55XX_ENGINE_DISABLED,
+       LP55XX_ENGINE_LOAD,
+       LP55XX_ENGINE_RUN,
+};
+
+#define LP55XX_DEV_ATTR_RW(name, show, store)  \
+       DEVICE_ATTR(name, S_IRUGO | S_IWUSR, show, store)
+#define LP55XX_DEV_ATTR_RO(name, show)         \
+       DEVICE_ATTR(name, S_IRUGO, show, NULL)
+#define LP55XX_DEV_ATTR_WO(name, store)                \
+       DEVICE_ATTR(name, S_IWUSR, NULL, store)
+
+#define show_mode(nr)                                                  \
+static ssize_t show_engine##nr##_mode(struct device *dev,              \
+                                   struct device_attribute *attr,      \
+                                   char *buf)                          \
+{                                                                      \
+       return show_engine_mode(dev, attr, buf, nr);                    \
+}
+
+#define store_mode(nr)                                                 \
+static ssize_t store_engine##nr##_mode(struct device *dev,             \
+                                    struct device_attribute *attr,     \
+                                    const char *buf, size_t len)       \
+{                                                                      \
+       return store_engine_mode(dev, attr, buf, len, nr);              \
+}
+
+#define show_leds(nr)                                                  \
+static ssize_t show_engine##nr##_leds(struct device *dev,              \
+                           struct device_attribute *attr,              \
+                           char *buf)                                  \
+{                                                                      \
+       return show_engine_leds(dev, attr, buf, nr);                    \
+}
+
+#define store_leds(nr)                                         \
+static ssize_t store_engine##nr##_leds(struct device *dev,     \
+                            struct device_attribute *attr,     \
+                            const char *buf, size_t len)       \
+{                                                              \
+       return store_engine_leds(dev, attr, buf, len, nr);      \
+}
+
+#define store_load(nr)                                                 \
+static ssize_t store_engine##nr##_load(struct device *dev,             \
+                                    struct device_attribute *attr,     \
+                                    const char *buf, size_t len)       \
+{                                                                      \
+       return store_engine_load(dev, attr, buf, len, nr);              \
+}
+
 struct lp55xx_led;
 struct lp55xx_chip;
 
@@ -71,6 +125,16 @@ struct lp55xx_device_config {
        const struct attribute_group *dev_attr_group;
 };
 
+/*
+ * struct lp55xx_engine
+ * @mode       : Engine mode
+ * @led_mux    : Mux bits for LED selection. Only used in LP5523
+ */
+struct lp55xx_engine {
+       enum lp55xx_engine_mode mode;
+       u16 led_mux;
+};
+
 /*
  * struct lp55xx_chip
  * @cl         : I2C communication for access registers
@@ -79,6 +143,7 @@ struct lp55xx_device_config {
  * @num_leds   : Number of registered LEDs
  * @cfg        : Device specific configuration data
  * @engine_idx : Selected engine number
+ * @engines    : Engine structure for the device attribute R/W interface
  * @fw         : Firmware data for running a LED pattern
  */
 struct lp55xx_chip {
@@ -89,6 +154,7 @@ struct lp55xx_chip {
        int num_leds;
        struct lp55xx_device_config *cfg;
        enum lp55xx_engine_index engine_idx;
+       struct lp55xx_engine engines[LP55XX_ENGINE_MAX];
        const struct firmware *fw;
 };
 
diff --git a/drivers/leds/leds-lp8501.c b/drivers/leds/leds-lp8501.c
new file mode 100644 (file)
index 0000000..8d55a78
--- /dev/null
@@ -0,0 +1,410 @@
+/*
+ * TI LP8501 9 channel LED Driver
+ *
+ * Copyright (C) 2013 Texas Instruments
+ *
+ * Author: Milo(Woogyom) Kim <milo.kim@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/firmware.h>
+#include <linux/i2c.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_data/leds-lp55xx.h>
+#include <linux/slab.h>
+
+#include "leds-lp55xx-common.h"
+
+#define LP8501_PROGRAM_LENGTH          32
+#define LP8501_MAX_LEDS                        9
+
+/* Registers */
+#define LP8501_REG_ENABLE              0x00
+#define LP8501_ENABLE                  BIT(6)
+#define LP8501_EXEC_M                  0x3F
+#define LP8501_EXEC_ENG1_M             0x30
+#define LP8501_EXEC_ENG2_M             0x0C
+#define LP8501_EXEC_ENG3_M             0x03
+#define LP8501_RUN_ENG1                        0x20
+#define LP8501_RUN_ENG2                        0x08
+#define LP8501_RUN_ENG3                        0x02
+
+#define LP8501_REG_OP_MODE             0x01
+#define LP8501_MODE_ENG1_M             0x30
+#define LP8501_MODE_ENG2_M             0x0C
+#define LP8501_MODE_ENG3_M             0x03
+#define LP8501_LOAD_ENG1               0x10
+#define LP8501_LOAD_ENG2               0x04
+#define LP8501_LOAD_ENG3               0x01
+
+#define LP8501_REG_PWR_CONFIG          0x05
+#define LP8501_PWR_CONFIG_M            0x03
+
+#define LP8501_REG_LED_PWM_BASE                0x16
+
+#define LP8501_REG_LED_CURRENT_BASE    0x26
+
+#define LP8501_REG_CONFIG              0x36
+#define LP8501_PWM_PSAVE               BIT(7)
+#define LP8501_AUTO_INC                        BIT(6)
+#define LP8501_PWR_SAVE                        BIT(5)
+#define LP8501_CP_AUTO                 0x18
+#define LP8501_INT_CLK                 BIT(0)
+#define LP8501_DEFAULT_CFG     \
+       (LP8501_PWM_PSAVE | LP8501_AUTO_INC | LP8501_PWR_SAVE | LP8501_CP_AUTO)
+
+#define LP8501_REG_RESET               0x3D
+#define LP8501_RESET                   0xFF
+
+#define LP8501_REG_PROG_PAGE_SEL       0x4F
+#define LP8501_PAGE_ENG1               0
+#define LP8501_PAGE_ENG2               1
+#define LP8501_PAGE_ENG3               2
+
+#define LP8501_REG_PROG_MEM            0x50
+
+#define LP8501_ENG1_IS_LOADING(mode)   \
+       ((mode & LP8501_MODE_ENG1_M) == LP8501_LOAD_ENG1)
+#define LP8501_ENG2_IS_LOADING(mode)   \
+       ((mode & LP8501_MODE_ENG2_M) == LP8501_LOAD_ENG2)
+#define LP8501_ENG3_IS_LOADING(mode)   \
+       ((mode & LP8501_MODE_ENG3_M) == LP8501_LOAD_ENG3)
+
+static inline void lp8501_wait_opmode_done(void)
+{
+       usleep_range(1000, 2000);
+}
+
+static void lp8501_set_led_current(struct lp55xx_led *led, u8 led_current)
+{
+       led->led_current = led_current;
+       lp55xx_write(led->chip, LP8501_REG_LED_CURRENT_BASE + led->chan_nr,
+               led_current);
+}
+
+static int lp8501_post_init_device(struct lp55xx_chip *chip)
+{
+       int ret;
+       u8 val = LP8501_DEFAULT_CFG;
+
+       ret = lp55xx_write(chip, LP8501_REG_ENABLE, LP8501_ENABLE);
+       if (ret)
+               return ret;
+
+       /* Chip startup time is 500 us, 1 - 2 ms gives some margin */
+       usleep_range(1000, 2000);
+
+       if (chip->pdata->clock_mode != LP55XX_CLOCK_EXT)
+               val |= LP8501_INT_CLK;
+
+       ret = lp55xx_write(chip, LP8501_REG_CONFIG, val);
+       if (ret)
+               return ret;
+
+       /* Power selection for each output */
+       return lp55xx_update_bits(chip, LP8501_REG_PWR_CONFIG,
+                               LP8501_PWR_CONFIG_M, chip->pdata->pwr_sel);
+}
+
+static void lp8501_load_engine(struct lp55xx_chip *chip)
+{
+       enum lp55xx_engine_index idx = chip->engine_idx;
+       u8 mask[] = {
+               [LP55XX_ENGINE_1] = LP8501_MODE_ENG1_M,
+               [LP55XX_ENGINE_2] = LP8501_MODE_ENG2_M,
+               [LP55XX_ENGINE_3] = LP8501_MODE_ENG3_M,
+       };
+
+       u8 val[] = {
+               [LP55XX_ENGINE_1] = LP8501_LOAD_ENG1,
+               [LP55XX_ENGINE_2] = LP8501_LOAD_ENG2,
+               [LP55XX_ENGINE_3] = LP8501_LOAD_ENG3,
+       };
+
+       u8 page_sel[] = {
+               [LP55XX_ENGINE_1] = LP8501_PAGE_ENG1,
+               [LP55XX_ENGINE_2] = LP8501_PAGE_ENG2,
+               [LP55XX_ENGINE_3] = LP8501_PAGE_ENG3,
+       };
+
+       lp55xx_update_bits(chip, LP8501_REG_OP_MODE, mask[idx], val[idx]);
+
+       lp8501_wait_opmode_done();
+
+       lp55xx_write(chip, LP8501_REG_PROG_PAGE_SEL, page_sel[idx]);
+}
+
+static void lp8501_stop_engine(struct lp55xx_chip *chip)
+{
+       lp55xx_write(chip, LP8501_REG_OP_MODE, 0);
+       lp8501_wait_opmode_done();
+}
+
+static void lp8501_turn_off_channels(struct lp55xx_chip *chip)
+{
+       int i;
+
+       for (i = 0; i < LP8501_MAX_LEDS; i++)
+               lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + i, 0);
+}
+
+static void lp8501_run_engine(struct lp55xx_chip *chip, bool start)
+{
+       int ret;
+       u8 mode;
+       u8 exec;
+
+       /* stop engine */
+       if (!start) {
+               lp8501_stop_engine(chip);
+               lp8501_turn_off_channels(chip);
+               return;
+       }
+
+       /*
+        * To run the engine,
+        * operation mode and enable register should updated at the same time
+        */
+
+       ret = lp55xx_read(chip, LP8501_REG_OP_MODE, &mode);
+       if (ret)
+               return;
+
+       ret = lp55xx_read(chip, LP8501_REG_ENABLE, &exec);
+       if (ret)
+               return;
+
+       /* change operation mode to RUN only when each engine is loading */
+       if (LP8501_ENG1_IS_LOADING(mode)) {
+               mode = (mode & ~LP8501_MODE_ENG1_M) | LP8501_RUN_ENG1;
+               exec = (exec & ~LP8501_EXEC_ENG1_M) | LP8501_RUN_ENG1;
+       }
+
+       if (LP8501_ENG2_IS_LOADING(mode)) {
+               mode = (mode & ~LP8501_MODE_ENG2_M) | LP8501_RUN_ENG2;
+               exec = (exec & ~LP8501_EXEC_ENG2_M) | LP8501_RUN_ENG2;
+       }
+
+       if (LP8501_ENG3_IS_LOADING(mode)) {
+               mode = (mode & ~LP8501_MODE_ENG3_M) | LP8501_RUN_ENG3;
+               exec = (exec & ~LP8501_EXEC_ENG3_M) | LP8501_RUN_ENG3;
+       }
+
+       lp55xx_write(chip, LP8501_REG_OP_MODE, mode);
+       lp8501_wait_opmode_done();
+
+       lp55xx_update_bits(chip, LP8501_REG_ENABLE, LP8501_EXEC_M, exec);
+}
+
+static int lp8501_update_program_memory(struct lp55xx_chip *chip,
+                                       const u8 *data, size_t size)
+{
+       u8 pattern[LP8501_PROGRAM_LENGTH] = {0};
+       unsigned cmd;
+       char c[3];
+       int update_size;
+       int nrchars;
+       int offset = 0;
+       int ret;
+       int i;
+
+       /* clear program memory before updating */
+       for (i = 0; i < LP8501_PROGRAM_LENGTH; i++)
+               lp55xx_write(chip, LP8501_REG_PROG_MEM + i, 0);
+
+       i = 0;
+       while ((offset < size - 1) && (i < LP8501_PROGRAM_LENGTH)) {
+               /* separate sscanfs because length is working only for %s */
+               ret = sscanf(data + offset, "%2s%n ", c, &nrchars);
+               if (ret != 1)
+                       goto err;
+
+               ret = sscanf(c, "%2x", &cmd);
+               if (ret != 1)
+                       goto err;
+
+               pattern[i] = (u8)cmd;
+               offset += nrchars;
+               i++;
+       }
+
+       /* Each instruction is 16bit long. Check that length is even */
+       if (i % 2)
+               goto err;
+
+       update_size = i;
+       for (i = 0; i < update_size; i++)
+               lp55xx_write(chip, LP8501_REG_PROG_MEM + i, pattern[i]);
+
+       return 0;
+
+err:
+       dev_err(&chip->cl->dev, "wrong pattern format\n");
+       return -EINVAL;
+}
+
+static void lp8501_firmware_loaded(struct lp55xx_chip *chip)
+{
+       const struct firmware *fw = chip->fw;
+
+       if (fw->size > LP8501_PROGRAM_LENGTH) {
+               dev_err(&chip->cl->dev, "firmware data size overflow: %zu\n",
+                       fw->size);
+               return;
+       }
+
+       /*
+        * Program momery sequence
+        *  1) set engine mode to "LOAD"
+        *  2) write firmware data into program memory
+        */
+
+       lp8501_load_engine(chip);
+       lp8501_update_program_memory(chip, fw->data, fw->size);
+}
+
+static void lp8501_led_brightness_work(struct work_struct *work)
+{
+       struct lp55xx_led *led = container_of(work, struct lp55xx_led,
+                                             brightness_work);
+       struct lp55xx_chip *chip = led->chip;
+
+       mutex_lock(&chip->lock);
+       lp55xx_write(chip, LP8501_REG_LED_PWM_BASE + led->chan_nr,
+                    led->brightness);
+       mutex_unlock(&chip->lock);
+}
+
+/* Chip specific configurations */
+static struct lp55xx_device_config lp8501_cfg = {
+       .reset = {
+               .addr = LP8501_REG_RESET,
+               .val  = LP8501_RESET,
+       },
+       .enable = {
+               .addr = LP8501_REG_ENABLE,
+               .val  = LP8501_ENABLE,
+       },
+       .max_channel  = LP8501_MAX_LEDS,
+       .post_init_device   = lp8501_post_init_device,
+       .brightness_work_fn = lp8501_led_brightness_work,
+       .set_led_current    = lp8501_set_led_current,
+       .firmware_cb        = lp8501_firmware_loaded,
+       .run_engine         = lp8501_run_engine,
+};
+
+static int lp8501_probe(struct i2c_client *client,
+                       const struct i2c_device_id *id)
+{
+       int ret;
+       struct lp55xx_chip *chip;
+       struct lp55xx_led *led;
+       struct lp55xx_platform_data *pdata;
+       struct device_node *np = client->dev.of_node;
+
+       if (!dev_get_platdata(&client->dev)) {
+               if (np) {
+                       ret = lp55xx_of_populate_pdata(&client->dev, np);
+                       if (ret < 0)
+                               return ret;
+               } else {
+                       dev_err(&client->dev, "no platform data\n");
+                       return -EINVAL;
+               }
+       }
+       pdata = dev_get_platdata(&client->dev);
+
+       chip = devm_kzalloc(&client->dev, sizeof(*chip), GFP_KERNEL);
+       if (!chip)
+               return -ENOMEM;
+
+       led = devm_kzalloc(&client->dev,
+                       sizeof(*led) * pdata->num_channels, GFP_KERNEL);
+       if (!led)
+               return -ENOMEM;
+
+       chip->cl = client;
+       chip->pdata = pdata;
+       chip->cfg = &lp8501_cfg;
+
+       mutex_init(&chip->lock);
+
+       i2c_set_clientdata(client, led);
+
+       ret = lp55xx_init_device(chip);
+       if (ret)
+               goto err_init;
+
+       dev_info(&client->dev, "%s Programmable led chip found\n", id->name);
+
+       ret = lp55xx_register_leds(led, chip);
+       if (ret)
+               goto err_register_leds;
+
+       ret = lp55xx_register_sysfs(chip);
+       if (ret) {
+               dev_err(&client->dev, "registering sysfs failed\n");
+               goto err_register_sysfs;
+       }
+
+       return 0;
+
+err_register_sysfs:
+       lp55xx_unregister_leds(led, chip);
+err_register_leds:
+       lp55xx_deinit_device(chip);
+err_init:
+       return ret;
+}
+
+static int lp8501_remove(struct i2c_client *client)
+{
+       struct lp55xx_led *led = i2c_get_clientdata(client);
+       struct lp55xx_chip *chip = led->chip;
+
+       lp8501_stop_engine(chip);
+       lp55xx_unregister_sysfs(chip);
+       lp55xx_unregister_leds(led, chip);
+       lp55xx_deinit_device(chip);
+
+       return 0;
+}
+
+static const struct i2c_device_id lp8501_id[] = {
+       { "lp8501",  0 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, lp8501_id);
+
+#ifdef CONFIG_OF
+static const struct of_device_id of_lp8501_leds_match[] = {
+       { .compatible = "ti,lp8501", },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, of_lp8501_leds_match);
+#endif
+
+static struct i2c_driver lp8501_driver = {
+       .driver = {
+               .name   = "lp8501",
+               .of_match_table = of_match_ptr(of_lp8501_leds_match),
+       },
+       .probe          = lp8501_probe,
+       .remove         = lp8501_remove,
+       .id_table       = lp8501_id,
+};
+
+module_i2c_driver(lp8501_driver);
+
+MODULE_DESCRIPTION("Texas Instruments LP8501 LED drvier");
+MODULE_AUTHOR("Milo Kim");
+MODULE_LICENSE("GPL");
index ca48a7d..3417e5b 100644 (file)
@@ -135,7 +135,7 @@ static void delete_lt3593_led(struct lt3593_led_data *led)
 
 static int lt3593_led_probe(struct platform_device *pdev)
 {
-       struct gpio_led_platform_data *pdata = pdev->dev.platform_data;
+       struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct lt3593_led_data *leds_data;
        int i, ret = 0;
 
@@ -169,7 +169,7 @@ err:
 static int lt3593_led_remove(struct platform_device *pdev)
 {
        int i;
-       struct gpio_led_platform_data *pdata = pdev->dev.platform_data;
+       struct gpio_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct lt3593_led_data *leds_data;
 
        leds_data = platform_get_drvdata(pdev);
index c61c5eb..2f9f141 100644 (file)
@@ -306,7 +306,7 @@ create_netxbig_led(struct platform_device *pdev,
                   struct netxbig_led_data *led_dat,
                   const struct netxbig_led *template)
 {
-       struct netxbig_led_platform_data *pdata = pdev->dev.platform_data;
+       struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        int ret;
 
        spin_lock_init(&led_dat->lock);
@@ -354,7 +354,7 @@ create_netxbig_led(struct platform_device *pdev,
 
 static int netxbig_led_probe(struct platform_device *pdev)
 {
-       struct netxbig_led_platform_data *pdata = pdev->dev.platform_data;
+       struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct netxbig_led_data *leds_data;
        int i;
        int ret;
@@ -391,7 +391,7 @@ err_free_leds:
 
 static int netxbig_led_remove(struct platform_device *pdev)
 {
-       struct netxbig_led_platform_data *pdata = pdev->dev.platform_data;
+       struct netxbig_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct netxbig_led_data *leds_data;
        int i;
 
index e7df987..141f134 100644 (file)
@@ -321,7 +321,7 @@ static inline int sizeof_ns2_led_priv(int num_leds)
 
 static int ns2_led_probe(struct platform_device *pdev)
 {
-       struct ns2_led_platform_data *pdata = pdev->dev.platform_data;
+       struct ns2_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct ns2_led_priv *priv;
        int i;
        int ret;
index 0c597bd..4a0e786 100644 (file)
@@ -446,7 +446,8 @@ static int pca9532_probe(struct i2c_client *client,
        const struct i2c_device_id *id)
 {
        struct pca9532_data *data = i2c_get_clientdata(client);
-       struct pca9532_platform_data *pca9532_pdata = client->dev.platform_data;
+       struct pca9532_platform_data *pca9532_pdata =
+                       dev_get_platdata(&client->dev);
 
        if (!pca9532_pdata)
                return -EIO;
index edf485b..c3a08b6 100644 (file)
@@ -267,7 +267,7 @@ static int pca955x_probe(struct i2c_client *client,
 
        chip = &pca955x_chipdefs[id->driver_data];
        adapter = to_i2c_adapter(client->dev.parent);
-       pdata = client->dev.platform_data;
+       pdata = dev_get_platdata(&client->dev);
 
        /* Make sure the slave address / chip type combo given is possible */
        if ((client->addr & ~((1 << chip->slv_addr_shift) - 1)) !=
diff --git a/drivers/leds/leds-pca9633.c b/drivers/leds/leds-pca9633.c
deleted file mode 100644 (file)
index 9aae567..0000000
+++ /dev/null
@@ -1,194 +0,0 @@
-/*
- * Copyright 2011 bct electronic GmbH
- *
- * Author: Peter Meerwald <p.meerwald@bct-electronic.com>
- *
- * Based on leds-pca955x.c
- *
- * This file is subject to the terms and conditions of version 2 of
- * the GNU General Public License.  See the file COPYING in the main
- * directory of this archive for more details.
- *
- * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62)
- *
- */
-
-#include <linux/module.h>
-#include <linux/delay.h>
-#include <linux/string.h>
-#include <linux/ctype.h>
-#include <linux/leds.h>
-#include <linux/err.h>
-#include <linux/i2c.h>
-#include <linux/workqueue.h>
-#include <linux/slab.h>
-#include <linux/platform_data/leds-pca9633.h>
-
-/* LED select registers determine the source that drives LED outputs */
-#define PCA9633_LED_OFF                0x0     /* LED driver off */
-#define PCA9633_LED_ON         0x1     /* LED driver on */
-#define PCA9633_LED_PWM                0x2     /* Controlled through PWM */
-#define PCA9633_LED_GRP_PWM    0x3     /* Controlled through PWM/GRPPWM */
-
-#define PCA9633_MODE1          0x00
-#define PCA9633_MODE2          0x01
-#define PCA9633_PWM_BASE       0x02
-#define PCA9633_LEDOUT         0x08
-
-static const struct i2c_device_id pca9633_id[] = {
-       { "pca9633", 0 },
-       { }
-};
-MODULE_DEVICE_TABLE(i2c, pca9633_id);
-
-struct pca9633_led {
-       struct i2c_client *client;
-       struct work_struct work;
-       enum led_brightness brightness;
-       struct led_classdev led_cdev;
-       int led_num; /* 0 .. 3 potentially */
-       char name[32];
-};
-
-static void pca9633_led_work(struct work_struct *work)
-{
-       struct pca9633_led *pca9633 = container_of(work,
-               struct pca9633_led, work);
-       u8 ledout = i2c_smbus_read_byte_data(pca9633->client, PCA9633_LEDOUT);
-       int shift = 2 * pca9633->led_num;
-       u8 mask = 0x3 << shift;
-
-       switch (pca9633->brightness) {
-       case LED_FULL:
-               i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT,
-                       (ledout & ~mask) | (PCA9633_LED_ON << shift));
-               break;
-       case LED_OFF:
-               i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT,
-                       ledout & ~mask);
-               break;
-       default:
-               i2c_smbus_write_byte_data(pca9633->client,
-                       PCA9633_PWM_BASE + pca9633->led_num,
-                       pca9633->brightness);
-               i2c_smbus_write_byte_data(pca9633->client, PCA9633_LEDOUT,
-                       (ledout & ~mask) | (PCA9633_LED_PWM << shift));
-               break;
-       }
-}
-
-static void pca9633_led_set(struct led_classdev *led_cdev,
-       enum led_brightness value)
-{
-       struct pca9633_led *pca9633;
-
-       pca9633 = container_of(led_cdev, struct pca9633_led, led_cdev);
-
-       pca9633->brightness = value;
-
-       /*
-        * Must use workqueue for the actual I/O since I2C operations
-        * can sleep.
-        */
-       schedule_work(&pca9633->work);
-}
-
-static int pca9633_probe(struct i2c_client *client,
-                                       const struct i2c_device_id *id)
-{
-       struct pca9633_led *pca9633;
-       struct pca9633_platform_data *pdata;
-       int i, err;
-
-       pdata = client->dev.platform_data;
-
-       if (pdata) {
-               if (pdata->leds.num_leds <= 0 || pdata->leds.num_leds > 4) {
-                       dev_err(&client->dev, "board info must claim at most 4 LEDs");
-                       return -EINVAL;
-               }
-       }
-
-       pca9633 = devm_kzalloc(&client->dev, 4 * sizeof(*pca9633), GFP_KERNEL);
-       if (!pca9633)
-               return -ENOMEM;
-
-       i2c_set_clientdata(client, pca9633);
-
-       for (i = 0; i < 4; i++) {
-               pca9633[i].client = client;
-               pca9633[i].led_num = i;
-
-               /* Platform data can specify LED names and default triggers */
-               if (pdata && i < pdata->leds.num_leds) {
-                       if (pdata->leds.leds[i].name)
-                               snprintf(pca9633[i].name,
-                                        sizeof(pca9633[i].name), "pca9633:%s",
-                                        pdata->leds.leds[i].name);
-                       if (pdata->leds.leds[i].default_trigger)
-                               pca9633[i].led_cdev.default_trigger =
-                                       pdata->leds.leds[i].default_trigger;
-               } else {
-                       snprintf(pca9633[i].name, sizeof(pca9633[i].name),
-                                "pca9633:%d", i);
-               }
-
-               pca9633[i].led_cdev.name = pca9633[i].name;
-               pca9633[i].led_cdev.brightness_set = pca9633_led_set;
-
-               INIT_WORK(&pca9633[i].work, pca9633_led_work);
-
-               err = led_classdev_register(&client->dev, &pca9633[i].led_cdev);
-               if (err < 0)
-                       goto exit;
-       }
-
-       /* Disable LED all-call address and set normal mode */
-       i2c_smbus_write_byte_data(client, PCA9633_MODE1, 0x00);
-
-       /* Configure output: open-drain or totem pole (push-pull) */
-       if (pdata && pdata->outdrv == PCA9633_OPEN_DRAIN)
-               i2c_smbus_write_byte_data(client, PCA9633_MODE2, 0x01);
-
-       /* Turn off LEDs */
-       i2c_smbus_write_byte_data(client, PCA9633_LEDOUT, 0x00);
-
-       return 0;
-
-exit:
-       while (i--) {
-               led_classdev_unregister(&pca9633[i].led_cdev);
-               cancel_work_sync(&pca9633[i].work);
-       }
-
-       return err;
-}
-
-static int pca9633_remove(struct i2c_client *client)
-{
-       struct pca9633_led *pca9633 = i2c_get_clientdata(client);
-       int i;
-
-       for (i = 0; i < 4; i++) {
-               led_classdev_unregister(&pca9633[i].led_cdev);
-               cancel_work_sync(&pca9633[i].work);
-       }
-
-       return 0;
-}
-
-static struct i2c_driver pca9633_driver = {
-       .driver = {
-               .name   = "leds-pca9633",
-               .owner  = THIS_MODULE,
-       },
-       .probe  = pca9633_probe,
-       .remove = pca9633_remove,
-       .id_table = pca9633_id,
-};
-
-module_i2c_driver(pca9633_driver);
-
-MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>");
-MODULE_DESCRIPTION("PCA9633 LED driver");
-MODULE_LICENSE("GPL v2");
diff --git a/drivers/leds/leds-pca963x.c b/drivers/leds/leds-pca963x.c
new file mode 100644 (file)
index 0000000..82589c0
--- /dev/null
@@ -0,0 +1,461 @@
+/*
+ * Copyright 2011 bct electronic GmbH
+ * Copyright 2013 Qtechnology/AS
+ *
+ * Author: Peter Meerwald <p.meerwald@bct-electronic.com>
+ * Author: Ricardo Ribalda <ricardo.ribalda@gmail.com>
+ *
+ * Based on leds-pca955x.c
+ *
+ * This file is subject to the terms and conditions of version 2 of
+ * the GNU General Public License.  See the file COPYING in the main
+ * directory of this archive for more details.
+ *
+ * LED driver for the PCA9633 I2C LED driver (7-bit slave address 0x62)
+ * LED driver for the PCA9634 I2C LED driver (7-bit slave address set by hw.)
+ *
+ * Note that hardware blinking violates the leds infrastructure driver
+ * interface since the hardware only supports blinking all LEDs with the
+ * same delay_on/delay_off rates.  That is, only the LEDs that are set to
+ * blink will actually blink but all LEDs that are set to blink will blink
+ * in identical fashion.  The delay_on/delay_off values of the last LED
+ * that is set to blink will be used for all of the blinking LEDs.
+ * Hardware blinking is disabled by default but can be enabled by setting
+ * the 'blink_type' member in the platform_data struct to 'PCA963X_HW_BLINK'
+ * or by adding the 'nxp,hw-blink' property to the DTS.
+ */
+
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/leds.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/workqueue.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <linux/platform_data/leds-pca963x.h>
+
+/* LED select registers determine the source that drives LED outputs */
+#define PCA963X_LED_OFF                0x0     /* LED driver off */
+#define PCA963X_LED_ON         0x1     /* LED driver on */
+#define PCA963X_LED_PWM                0x2     /* Controlled through PWM */
+#define PCA963X_LED_GRP_PWM    0x3     /* Controlled through PWM/GRPPWM */
+
+#define PCA963X_MODE2_DMBLNK   0x20    /* Enable blinking */
+
+#define PCA963X_MODE1          0x00
+#define PCA963X_MODE2          0x01
+#define PCA963X_PWM_BASE       0x02
+
+enum pca963x_type {
+       pca9633,
+       pca9634,
+};
+
+struct pca963x_chipdef {
+       u8                      grppwm;
+       u8                      grpfreq;
+       u8                      ledout_base;
+       int                     n_leds;
+};
+
+static struct pca963x_chipdef pca963x_chipdefs[] = {
+       [pca9633] = {
+               .grppwm         = 0x6,
+               .grpfreq        = 0x7,
+               .ledout_base    = 0x8,
+               .n_leds         = 4,
+       },
+       [pca9634] = {
+               .grppwm         = 0xa,
+               .grpfreq        = 0xb,
+               .ledout_base    = 0xc,
+               .n_leds         = 8,
+       },
+};
+
+/* Total blink period in milliseconds */
+#define PCA963X_BLINK_PERIOD_MIN       42
+#define PCA963X_BLINK_PERIOD_MAX       10667
+
+static const struct i2c_device_id pca963x_id[] = {
+       { "pca9632", pca9633 },
+       { "pca9633", pca9633 },
+       { "pca9634", pca9634 },
+       { }
+};
+MODULE_DEVICE_TABLE(i2c, pca963x_id);
+
+enum pca963x_cmd {
+       BRIGHTNESS_SET,
+       BLINK_SET,
+};
+
+struct pca963x_led;
+
+struct pca963x {
+       struct pca963x_chipdef *chipdef;
+       struct mutex mutex;
+       struct i2c_client *client;
+       struct pca963x_led *leds;
+};
+
+struct pca963x_led {
+       struct pca963x *chip;
+       struct work_struct work;
+       enum led_brightness brightness;
+       struct led_classdev led_cdev;
+       int led_num; /* 0 .. 7 potentially */
+       enum pca963x_cmd cmd;
+       char name[32];
+       u8 gdc;
+       u8 gfrq;
+};
+
+static void pca963x_brightness_work(struct pca963x_led *pca963x)
+{
+       u8 ledout_addr = pca963x->chip->chipdef->ledout_base
+               + (pca963x->led_num / 4);
+       u8 ledout;
+       int shift = 2 * (pca963x->led_num % 4);
+       u8 mask = 0x3 << shift;
+
+       mutex_lock(&pca963x->chip->mutex);
+       ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr);
+       switch (pca963x->brightness) {
+       case LED_FULL:
+               i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr,
+                       (ledout & ~mask) | (PCA963X_LED_ON << shift));
+               break;
+       case LED_OFF:
+               i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr,
+                       ledout & ~mask);
+               break;
+       default:
+               i2c_smbus_write_byte_data(pca963x->chip->client,
+                       PCA963X_PWM_BASE + pca963x->led_num,
+                       pca963x->brightness);
+               i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr,
+                       (ledout & ~mask) | (PCA963X_LED_PWM << shift));
+               break;
+       }
+       mutex_unlock(&pca963x->chip->mutex);
+}
+
+static void pca963x_blink_work(struct pca963x_led *pca963x)
+{
+       u8 ledout_addr = pca963x->chip->chipdef->ledout_base +
+               (pca963x->led_num / 4);
+       u8 ledout;
+       u8 mode2 = i2c_smbus_read_byte_data(pca963x->chip->client,
+                                                       PCA963X_MODE2);
+       int shift = 2 * (pca963x->led_num % 4);
+       u8 mask = 0x3 << shift;
+
+       i2c_smbus_write_byte_data(pca963x->chip->client,
+                       pca963x->chip->chipdef->grppwm, pca963x->gdc);
+
+       i2c_smbus_write_byte_data(pca963x->chip->client,
+                       pca963x->chip->chipdef->grpfreq, pca963x->gfrq);
+
+       if (!(mode2 & PCA963X_MODE2_DMBLNK))
+               i2c_smbus_write_byte_data(pca963x->chip->client, PCA963X_MODE2,
+                       mode2 | PCA963X_MODE2_DMBLNK);
+
+       mutex_lock(&pca963x->chip->mutex);
+       ledout = i2c_smbus_read_byte_data(pca963x->chip->client, ledout_addr);
+       if ((ledout & mask) != (PCA963X_LED_GRP_PWM << shift))
+               i2c_smbus_write_byte_data(pca963x->chip->client, ledout_addr,
+                       (ledout & ~mask) | (PCA963X_LED_GRP_PWM << shift));
+       mutex_unlock(&pca963x->chip->mutex);
+}
+
+static void pca963x_work(struct work_struct *work)
+{
+       struct pca963x_led *pca963x = container_of(work,
+               struct pca963x_led, work);
+
+       switch (pca963x->cmd) {
+       case BRIGHTNESS_SET:
+               pca963x_brightness_work(pca963x);
+               break;
+       case BLINK_SET:
+               pca963x_blink_work(pca963x);
+               break;
+       }
+}
+
+static void pca963x_led_set(struct led_classdev *led_cdev,
+       enum led_brightness value)
+{
+       struct pca963x_led *pca963x;
+
+       pca963x = container_of(led_cdev, struct pca963x_led, led_cdev);
+
+       pca963x->cmd = BRIGHTNESS_SET;
+       pca963x->brightness = value;
+
+       /*
+        * Must use workqueue for the actual I/O since I2C operations
+        * can sleep.
+        */
+       schedule_work(&pca963x->work);
+}
+
+static int pca963x_blink_set(struct led_classdev *led_cdev,
+               unsigned long *delay_on, unsigned long *delay_off)
+{
+       struct pca963x_led *pca963x;
+       unsigned long time_on, time_off, period;
+       u8 gdc, gfrq;
+
+       pca963x = container_of(led_cdev, struct pca963x_led, led_cdev);
+
+       time_on = *delay_on;
+       time_off = *delay_off;
+
+       /* If both zero, pick reasonable defaults of 500ms each */
+       if (!time_on && !time_off) {
+               time_on = 500;
+               time_off = 500;
+       }
+
+       period = time_on + time_off;
+
+       /* If period not supported by hardware, default to someting sane. */
+       if ((period < PCA963X_BLINK_PERIOD_MIN) ||
+           (period > PCA963X_BLINK_PERIOD_MAX)) {
+               time_on = 500;
+               time_off = 500;
+               period = time_on + time_off;
+       }
+
+       /*
+        * From manual: duty cycle = (GDC / 256) ->
+        *      (time_on / period) = (GDC / 256) ->
+        *              GDC = ((time_on * 256) / period)
+        */
+       gdc = (time_on * 256) / period;
+
+       /*
+        * From manual: period = ((GFRQ + 1) / 24) in seconds.
+        * So, period (in ms) = (((GFRQ + 1) / 24) * 1000) ->
+        *              GFRQ = ((period * 24 / 1000) - 1)
+        */
+       gfrq = (period * 24 / 1000) - 1;
+
+       pca963x->cmd = BLINK_SET;
+       pca963x->gdc = gdc;
+       pca963x->gfrq = gfrq;
+
+       /*
+        * Must use workqueue for the actual I/O since I2C operations
+        * can sleep.
+        */
+       schedule_work(&pca963x->work);
+
+       *delay_on = time_on;
+       *delay_off = time_off;
+
+       return 0;
+}
+
+#if IS_ENABLED(CONFIG_OF)
+static struct pca963x_platform_data *
+pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip)
+{
+       struct device_node *np = client->dev.of_node, *child;
+       struct pca963x_platform_data *pdata;
+       struct led_info *pca963x_leds;
+       int count;
+
+       count = of_get_child_count(np);
+       if (!count || count > chip->n_leds)
+               return ERR_PTR(-ENODEV);
+
+       pca963x_leds = devm_kzalloc(&client->dev,
+                       sizeof(struct led_info) * chip->n_leds, GFP_KERNEL);
+       if (!pca963x_leds)
+               return ERR_PTR(-ENOMEM);
+
+       for_each_child_of_node(np, child) {
+               struct led_info led;
+               u32 reg;
+               int res;
+
+               res = of_property_read_u32(child, "reg", &reg);
+               if ((res != 0) || (reg >= chip->n_leds))
+                       continue;
+               led.name =
+                       of_get_property(child, "label", NULL) ? : child->name;
+               led.default_trigger =
+                       of_get_property(child, "linux,default-trigger", NULL);
+               pca963x_leds[reg] = led;
+       }
+       pdata = devm_kzalloc(&client->dev,
+                            sizeof(struct pca963x_platform_data), GFP_KERNEL);
+       if (!pdata)
+               return ERR_PTR(-ENOMEM);
+
+       pdata->leds.leds = pca963x_leds;
+       pdata->leds.num_leds = chip->n_leds;
+
+       /* default to open-drain unless totem pole (push-pull) is specified */
+       if (of_property_read_bool(np, "nxp,totem-pole"))
+               pdata->outdrv = PCA963X_TOTEM_POLE;
+       else
+               pdata->outdrv = PCA963X_OPEN_DRAIN;
+
+       /* default to software blinking unless hardware blinking is specified */
+       if (of_property_read_bool(np, "nxp,hw-blink"))
+               pdata->blink_type = PCA963X_HW_BLINK;
+       else
+               pdata->blink_type = PCA963X_SW_BLINK;
+
+       return pdata;
+}
+
+static const struct of_device_id of_pca963x_match[] = {
+       { .compatible = "nxp,pca9632", },
+       { .compatible = "nxp,pca9633", },
+       { .compatible = "nxp,pca9634", },
+       {},
+};
+#else
+static struct pca963x_platform_data *
+pca963x_dt_init(struct i2c_client *client, struct pca963x_chipdef *chip)
+{
+       return ERR_PTR(-ENODEV);
+}
+#endif
+
+static int pca963x_probe(struct i2c_client *client,
+                                       const struct i2c_device_id *id)
+{
+       struct pca963x *pca963x_chip;
+       struct pca963x_led *pca963x;
+       struct pca963x_platform_data *pdata;
+       struct pca963x_chipdef *chip;
+       int i, err;
+
+       chip = &pca963x_chipdefs[id->driver_data];
+       pdata = dev_get_platdata(&client->dev);
+
+       if (!pdata) {
+               pdata = pca963x_dt_init(client, chip);
+               if (IS_ERR(pdata)) {
+                       dev_warn(&client->dev, "could not parse configuration\n");
+                       pdata = NULL;
+               }
+       }
+
+       if (pdata && (pdata->leds.num_leds < 1 ||
+                                pdata->leds.num_leds > chip->n_leds)) {
+               dev_err(&client->dev, "board info must claim 1-%d LEDs",
+                                                               chip->n_leds);
+               return -EINVAL;
+       }
+
+       pca963x_chip = devm_kzalloc(&client->dev, sizeof(*pca963x_chip),
+                                                               GFP_KERNEL);
+       if (!pca963x_chip)
+               return -ENOMEM;
+       pca963x = devm_kzalloc(&client->dev, chip->n_leds * sizeof(*pca963x),
+                                                               GFP_KERNEL);
+       if (!pca963x)
+               return -ENOMEM;
+
+       i2c_set_clientdata(client, pca963x_chip);
+
+       mutex_init(&pca963x_chip->mutex);
+       pca963x_chip->chipdef = chip;
+       pca963x_chip->client = client;
+       pca963x_chip->leds = pca963x;
+
+       /* Turn off LEDs by default*/
+       i2c_smbus_write_byte_data(client, chip->ledout_base, 0x00);
+       if (chip->n_leds > 4)
+               i2c_smbus_write_byte_data(client, chip->ledout_base + 1, 0x00);
+
+       for (i = 0; i < chip->n_leds; i++) {
+               pca963x[i].led_num = i;
+               pca963x[i].chip = pca963x_chip;
+
+               /* Platform data can specify LED names and default triggers */
+               if (pdata && i < pdata->leds.num_leds) {
+                       if (pdata->leds.leds[i].name)
+                               snprintf(pca963x[i].name,
+                                        sizeof(pca963x[i].name), "pca963x:%s",
+                                        pdata->leds.leds[i].name);
+                       if (pdata->leds.leds[i].default_trigger)
+                               pca963x[i].led_cdev.default_trigger =
+                                       pdata->leds.leds[i].default_trigger;
+               }
+               if (!pdata || i >= pdata->leds.num_leds ||
+                                               !pdata->leds.leds[i].name)
+                       snprintf(pca963x[i].name, sizeof(pca963x[i].name),
+                                "pca963x:%d:%.2x:%d", client->adapter->nr,
+                                client->addr, i);
+
+               pca963x[i].led_cdev.name = pca963x[i].name;
+               pca963x[i].led_cdev.brightness_set = pca963x_led_set;
+
+               if (pdata && pdata->blink_type == PCA963X_HW_BLINK)
+                       pca963x[i].led_cdev.blink_set = pca963x_blink_set;
+
+               INIT_WORK(&pca963x[i].work, pca963x_work);
+
+               err = led_classdev_register(&client->dev, &pca963x[i].led_cdev);
+               if (err < 0)
+                       goto exit;
+       }
+
+       /* Disable LED all-call address and set normal mode */
+       i2c_smbus_write_byte_data(client, PCA963X_MODE1, 0x00);
+
+       /* Configure output: open-drain or totem pole (push-pull) */
+       if (pdata && pdata->outdrv == PCA963X_OPEN_DRAIN)
+               i2c_smbus_write_byte_data(client, PCA963X_MODE2, 0x01);
+
+       return 0;
+
+exit:
+       while (i--) {
+               led_classdev_unregister(&pca963x[i].led_cdev);
+               cancel_work_sync(&pca963x[i].work);
+       }
+
+       return err;
+}
+
+static int pca963x_remove(struct i2c_client *client)
+{
+       struct pca963x *pca963x = i2c_get_clientdata(client);
+       int i;
+
+       for (i = 0; i < pca963x->chipdef->n_leds; i++) {
+               led_classdev_unregister(&pca963x->leds[i].led_cdev);
+               cancel_work_sync(&pca963x->leds[i].work);
+       }
+
+       return 0;
+}
+
+static struct i2c_driver pca963x_driver = {
+       .driver = {
+               .name   = "leds-pca963x",
+               .owner  = THIS_MODULE,
+               .of_match_table = of_match_ptr(of_pca963x_match),
+       },
+       .probe  = pca963x_probe,
+       .remove = pca963x_remove,
+       .id_table = pca963x_id,
+};
+
+module_i2c_driver(pca963x_driver);
+
+MODULE_AUTHOR("Peter Meerwald <p.meerwald@bct-electronic.com>");
+MODULE_DESCRIPTION("PCA963X LED driver");
+MODULE_LICENSE("GPL v2");
index faf52c0..bb6f948 100644 (file)
@@ -147,7 +147,7 @@ err:
 
 static int led_pwm_probe(struct platform_device *pdev)
 {
-       struct led_pwm_platform_data *pdata = pdev->dev.platform_data;
+       struct led_pwm_platform_data *pdata = dev_get_platdata(&pdev->dev);
        struct led_pwm_priv *priv;
        int i, ret = 0;
 
index 4253a9b..358430d 100644 (file)
@@ -142,7 +142,8 @@ static void regulator_led_brightness_set(struct led_classdev *led_cdev,
 
 static int regulator_led_probe(struct platform_device *pdev)
 {
-       struct led_regulator_platform_data *pdata = pdev->dev.platform_data;
+       struct led_regulator_platform_data *pdata =
+                       dev_get_platdata(&pdev->dev);
        struct regulator_led *led;
        struct regulator *vcc;
        int ret = 0;
index e1a0df6..76483fb 100644 (file)
@@ -71,7 +71,7 @@ static int s3c24xx_led_remove(struct platform_device *dev)
 
 static int s3c24xx_led_probe(struct platform_device *dev)
 {
-       struct s3c24xx_led_platdata *pdata = dev->dev.platform_data;
+       struct s3c24xx_led_platdata *pdata = dev_get_platdata(&dev->dev);
        struct s3c24xx_gpio_led *led;
        int ret;
 
index 64e204e..5b8f938 100644 (file)
@@ -91,7 +91,7 @@ MODULE_PARM_DESC(nodetect, "Skip DMI-based hardware detection");
  * detected as working, but in reality it is not) as low as
  * possible.
  */
-static struct dmi_system_id __initdata nas_led_whitelist[] = {
+static struct dmi_system_id nas_led_whitelist[] __initdata = {
        {
                .callback = ss4200_led_dmi_callback,
                .ident = "Intel SS4200-E",
@@ -197,7 +197,7 @@ static void nasgpio_led_set_attr(struct led_classdev *led_cdev,
        spin_unlock(&nasgpio_gpio_lock);
 }
 
-u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port)
+static u32 nasgpio_led_get_attr(struct led_classdev *led_cdev, u32 port)
 {
        struct nasgpio_led *led = led_classdev_to_nasgpio_led(led_cdev);
        u32 gpio_in;
index 98fe021..8cc304f 100644 (file)
@@ -737,7 +737,7 @@ static int tca6507_probe(struct i2c_client *client,
        int i = 0;
 
        adapter = to_i2c_adapter(client->dev.parent);
-       pdata = client->dev.platform_data;
+       pdata = dev_get_platdata(&client->dev);
 
        if (!i2c_check_functionality(adapter, I2C_FUNC_I2C))
                return -EIO;
index 120815a..0a1a13f 100644 (file)
@@ -230,9 +230,9 @@ static int wm831x_status_probe(struct platform_device *pdev)
        int id = pdev->id % ARRAY_SIZE(chip_pdata->status);
        int ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_IO, 0);
+       res = platform_get_resource(pdev, IORESOURCE_REG, 0);
        if (res == NULL) {
-               dev_err(&pdev->dev, "No I/O resource\n");
+               dev_err(&pdev->dev, "No register resource\n");
                ret = -EINVAL;
                goto err;
        }
@@ -246,8 +246,8 @@ static int wm831x_status_probe(struct platform_device *pdev)
        drvdata->wm831x = wm831x;
        drvdata->reg = res->start;
 
-       if (wm831x->dev->platform_data)
-               chip_pdata = wm831x->dev->platform_data;
+       if (dev_get_platdata(wm831x->dev))
+               chip_pdata = dev_get_platdata(wm831x->dev);
        else
                chip_pdata = NULL;
 
index 8a181d5..3f75fd2 100644 (file)
@@ -203,7 +203,7 @@ static int wm8350_led_probe(struct platform_device *pdev)
 {
        struct regulator *isink, *dcdc;
        struct wm8350_led *led;
-       struct wm8350_led_platform_data *pdata = pdev->dev.platform_data;
+       struct wm8350_led_platform_data *pdata = dev_get_platdata(&pdev->dev);
        int i;
 
        if (pdata == NULL) {
index 3c9c88a..47e55aa 100644 (file)
@@ -36,26 +36,28 @@ static int fb_notifier_callback(struct notifier_block *p,
                                        struct bl_trig_notifier, notifier);
        struct led_classdev *led = n->led;
        struct fb_event *fb_event = data;
-       int *blank = fb_event->data;
-       int new_status = *blank ? BLANK : UNBLANK;
+       int *blank;
+       int new_status;
 
-       switch (event) {
-       case FB_EVENT_BLANK:
-               if (new_status == n->old_status)
-                       break;
+       /* If we aren't interested in this event, skip it immediately ... */
+       if (event != FB_EVENT_BLANK)
+               return 0;
 
-               if ((n->old_status == UNBLANK) ^ n->invert) {
-                       n->brightness = led->brightness;
-                       __led_set_brightness(led, LED_OFF);
-               } else {
-                       __led_set_brightness(led, n->brightness);
-               }
+       blank = fb_event->data;
+       new_status = *blank ? BLANK : UNBLANK;
 
-               n->old_status = new_status;
+       if (new_status == n->old_status)
+               return 0;
 
-               break;
+       if ((n->old_status == UNBLANK) ^ n->invert) {
+               n->brightness = led->brightness;
+               __led_set_brightness(led, LED_OFF);
+       } else {
+               __led_set_brightness(led, n->brightness);
        }
 
+       n->old_status = new_status;
+
        return 0;
 }
 
index ee37288..f9764e6 100644 (file)
@@ -597,24 +597,19 @@ static int mca_reap(struct btree *b, struct closure *cl, unsigned min_order)
        return 0;
 }
 
-static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long bch_mca_scan(struct shrinker *shrink,
+                                 struct shrink_control *sc)
 {
        struct cache_set *c = container_of(shrink, struct cache_set, shrink);
        struct btree *b, *t;
        unsigned long i, nr = sc->nr_to_scan;
+       unsigned long freed = 0;
 
        if (c->shrinker_disabled)
-               return 0;
+               return SHRINK_STOP;
 
        if (c->try_harder)
-               return 0;
-
-       /*
-        * If nr == 0, we're supposed to return the number of items we have
-        * cached. Not allowed to return -1.
-        */
-       if (!nr)
-               return mca_can_free(c) * c->btree_pages;
+               return SHRINK_STOP;
 
        /* Return -1 if we can't do anything right now */
        if (sc->gfp_mask & __GFP_WAIT)
@@ -634,14 +629,14 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
 
        i = 0;
        list_for_each_entry_safe(b, t, &c->btree_cache_freeable, list) {
-               if (!nr)
+               if (freed >= nr)
                        break;
 
                if (++i > 3 &&
                    !mca_reap(b, NULL, 0)) {
                        mca_data_free(b);
                        rw_unlock(true, b);
-                       --nr;
+                       freed++;
                }
        }
 
@@ -652,7 +647,7 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
        if (list_empty(&c->btree_cache))
                goto out;
 
-       for (i = 0; nr && i < c->bucket_cache_used; i++) {
+       for (i = 0; (nr--) && i < c->bucket_cache_used; i++) {
                b = list_first_entry(&c->btree_cache, struct btree, list);
                list_rotate_left(&c->btree_cache);
 
@@ -661,14 +656,27 @@ static int bch_mca_shrink(struct shrinker *shrink, struct shrink_control *sc)
                        mca_bucket_free(b);
                        mca_data_free(b);
                        rw_unlock(true, b);
-                       --nr;
+                       freed++;
                } else
                        b->accessed = 0;
        }
 out:
-       nr = mca_can_free(c) * c->btree_pages;
        mutex_unlock(&c->bucket_lock);
-       return nr;
+       return freed;
+}
+
+static unsigned long bch_mca_count(struct shrinker *shrink,
+                                  struct shrink_control *sc)
+{
+       struct cache_set *c = container_of(shrink, struct cache_set, shrink);
+
+       if (c->shrinker_disabled)
+               return 0;
+
+       if (c->try_harder)
+               return 0;
+
+       return mca_can_free(c) * c->btree_pages;
 }
 
 void bch_btree_cache_free(struct cache_set *c)
@@ -737,7 +745,8 @@ int bch_btree_cache_alloc(struct cache_set *c)
                c->verify_data = NULL;
 #endif
 
-       c->shrink.shrink = bch_mca_shrink;
+       c->shrink.count_objects = bch_mca_count;
+       c->shrink.scan_objects = bch_mca_scan;
        c->shrink.seeks = 4;
        c->shrink.batch = c->btree_pages * 2;
        register_shrinker(&c->shrink);
index 12a2c28..4fe6ab2 100644 (file)
@@ -556,7 +556,7 @@ STORE(__bch_cache_set)
                struct shrink_control sc;
                sc.gfp_mask = GFP_KERNEL;
                sc.nr_to_scan = strtoul_or_return(buf);
-               c->shrink.shrink(&c->shrink, &sc);
+               c->shrink.scan_objects(&c->shrink, &sc);
        }
 
        sysfs_strtoul(congested_read_threshold_us,
index 5227e07..173cbb2 100644 (file)
@@ -1425,62 +1425,75 @@ static int __cleanup_old_buffer(struct dm_buffer *b, gfp_t gfp,
                                unsigned long max_jiffies)
 {
        if (jiffies - b->last_accessed < max_jiffies)
-               return 1;
+               return 0;
 
        if (!(gfp & __GFP_IO)) {
                if (test_bit(B_READING, &b->state) ||
                    test_bit(B_WRITING, &b->state) ||
                    test_bit(B_DIRTY, &b->state))
-                       return 1;
+                       return 0;
        }
 
        if (b->hold_count)
-               return 1;
+               return 0;
 
        __make_buffer_clean(b);
        __unlink_buffer(b);
        __free_buffer_wake(b);
 
-       return 0;
+       return 1;
 }
 
-static void __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
-                  struct shrink_control *sc)
+static long __scan(struct dm_bufio_client *c, unsigned long nr_to_scan,
+                  gfp_t gfp_mask)
 {
        int l;
        struct dm_buffer *b, *tmp;
+       long freed = 0;
 
        for (l = 0; l < LIST_SIZE; l++) {
-               list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list)
-                       if (!__cleanup_old_buffer(b, sc->gfp_mask, 0) &&
-                           !--nr_to_scan)
-                               return;
+               list_for_each_entry_safe_reverse(b, tmp, &c->lru[l], lru_list) {
+                       freed += __cleanup_old_buffer(b, gfp_mask, 0);
+                       if (!--nr_to_scan)
+                               break;
+               }
                dm_bufio_cond_resched();
        }
+       return freed;
 }
 
-static int shrink(struct shrinker *shrinker, struct shrink_control *sc)
+static unsigned long
+dm_bufio_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
-       struct dm_bufio_client *c =
-           container_of(shrinker, struct dm_bufio_client, shrinker);
-       unsigned long r;
-       unsigned long nr_to_scan = sc->nr_to_scan;
+       struct dm_bufio_client *c;
+       unsigned long freed;
 
+       c = container_of(shrink, struct dm_bufio_client, shrinker);
        if (sc->gfp_mask & __GFP_IO)
                dm_bufio_lock(c);
        else if (!dm_bufio_trylock(c))
-               return !nr_to_scan ? 0 : -1;
+               return SHRINK_STOP;
 
-       if (nr_to_scan)
-               __scan(c, nr_to_scan, sc);
+       freed  = __scan(c, sc->nr_to_scan, sc->gfp_mask);
+       dm_bufio_unlock(c);
+       return freed;
+}
 
-       r = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
-       if (r > INT_MAX)
-               r = INT_MAX;
+static unsigned long
+dm_bufio_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       struct dm_bufio_client *c;
+       unsigned long count;
 
-       dm_bufio_unlock(c);
+       c = container_of(shrink, struct dm_bufio_client, shrinker);
+       if (sc->gfp_mask & __GFP_IO)
+               dm_bufio_lock(c);
+       else if (!dm_bufio_trylock(c))
+               return 0;
 
-       return r;
+       count = c->n_buffers[LIST_CLEAN] + c->n_buffers[LIST_DIRTY];
+       dm_bufio_unlock(c);
+       return count;
 }
 
 /*
@@ -1582,7 +1595,8 @@ struct dm_bufio_client *dm_bufio_client_create(struct block_device *bdev, unsign
        __cache_size_refresh();
        mutex_unlock(&dm_bufio_clients_lock);
 
-       c->shrinker.shrink = shrink;
+       c->shrinker.count_objects = dm_bufio_shrink_count;
+       c->shrinker.scan_objects = dm_bufio_shrink_scan;
        c->shrinker.seeks = 1;
        c->shrinker.batch = 0;
        register_shrinker(&c->shrinker);
@@ -1669,7 +1683,7 @@ static void cleanup_old_buffers(void)
                        struct dm_buffer *b;
                        b = list_entry(c->lru[LIST_CLEAN].prev,
                                       struct dm_buffer, lru_list);
-                       if (__cleanup_old_buffer(b, 0, max_age * HZ))
+                       if (!__cleanup_old_buffer(b, 0, max_age * HZ))
                                break;
                        dm_bufio_cond_resched();
                }
index 8068d7b..c7caf94 100644 (file)
@@ -203,7 +203,7 @@ config VIDEO_SAMSUNG_EXYNOS_GSC
 
 config VIDEO_SH_VEU
        tristate "SuperH VEU mem2mem video processing driver"
-       depends on VIDEO_DEV && VIDEO_V4L2 && GENERIC_HARDIRQS && HAS_DMA
+       depends on VIDEO_DEV && VIDEO_V4L2 && HAS_DMA
        select VIDEOBUF2_DMA_CONTIG
        select V4L2_MEM2MEM_DEV
        help
index 39882dd..6ecdc39 100644 (file)
@@ -214,7 +214,7 @@ config RADIO_TIMBERDALE
 
 config RADIO_WL1273
        tristate "Texas Instruments WL1273 I2C FM Radio"
-       depends on I2C && VIDEO_V4L2 && GENERIC_HARDIRQS
+       depends on I2C && VIDEO_V4L2
        select MFD_CORE
        select MFD_WL1273_CORE
        select FW_LOADER
index 95f1814..1d38949 100644 (file)
@@ -24,3 +24,15 @@ config MSPRO_BLOCK
          support. This provides a block device driver, which you can use
          to mount the filesystem. Almost everyone wishing MemoryStick
          support should say Y or M here.
+
+config MS_BLOCK
+       tristate "MemoryStick Standard device driver"
+       depends on BLOCK
+       help
+         Say Y here to enable the MemoryStick Standard device driver
+         support. This provides a block device driver, which you can use
+         to mount the filesystem.
+         This driver works with old (bulky) MemoryStick and MemoryStick Duo
+         but not PRO. Say Y if you have such card.
+         Driver is new and not yet well tested, thus it can damage your card
+         (even permanently)
index ecd0299..0d7f90c 100644 (file)
@@ -3,5 +3,5 @@
 #
 
 obj-$(CONFIG_MEMSTICK)         += memstick.o
-
+obj-$(CONFIG_MS_BLOCK)         += ms_block.o
 obj-$(CONFIG_MSPRO_BLOCK)      += mspro_block.o
diff --git a/drivers/memstick/core/ms_block.c b/drivers/memstick/core/ms_block.c
new file mode 100644 (file)
index 0000000..08e7023
--- /dev/null
@@ -0,0 +1,2385 @@
+/*
+ *  ms_block.c - Sony MemoryStick (legacy) storage support
+
+ *  Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Minor portions of the driver were copied from mspro_block.c which is
+ * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ */
+#define DRIVER_NAME "ms_block"
+#define pr_fmt(fmt) DRIVER_NAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/memstick.h>
+#include <linux/idr.h>
+#include <linux/hdreg.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/random.h>
+#include <linux/bitmap.h>
+#include <linux/scatterlist.h>
+#include <linux/jiffies.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
+#include "ms_block.h"
+
+static int debug;
+static int cache_flush_timeout = 1000;
+static bool verify_writes;
+
+/*
+ * Copies section of 'sg_from' starting from offset 'offset' and with length
+ * 'len' To another scatterlist of to_nents enties
+ */
+static size_t msb_sg_copy(struct scatterlist *sg_from,
+       struct scatterlist *sg_to, int to_nents, size_t offset, size_t len)
+{
+       size_t copied = 0;
+
+       while (offset > 0) {
+               if (offset >= sg_from->length) {
+                       if (sg_is_last(sg_from))
+                               return 0;
+
+                       offset -= sg_from->length;
+                       sg_from = sg_next(sg_from);
+                       continue;
+               }
+
+               copied = min(len, sg_from->length - offset);
+               sg_set_page(sg_to, sg_page(sg_from),
+                       copied, sg_from->offset + offset);
+
+               len -= copied;
+               offset = 0;
+
+               if (sg_is_last(sg_from) || !len)
+                       goto out;
+
+               sg_to = sg_next(sg_to);
+               to_nents--;
+               sg_from = sg_next(sg_from);
+       }
+
+       while (len > sg_from->length && to_nents--) {
+               len -= sg_from->length;
+               copied += sg_from->length;
+
+               sg_set_page(sg_to, sg_page(sg_from),
+                               sg_from->length, sg_from->offset);
+
+               if (sg_is_last(sg_from) || !len)
+                       goto out;
+
+               sg_from = sg_next(sg_from);
+               sg_to = sg_next(sg_to);
+       }
+
+       if (len && to_nents) {
+               sg_set_page(sg_to, sg_page(sg_from), len, sg_from->offset);
+               copied += len;
+       }
+out:
+       sg_mark_end(sg_to);
+       return copied;
+}
+
+/*
+ * Compares section of 'sg' starting from offset 'offset' and with length 'len'
+ * to linear buffer of length 'len' at address 'buffer'
+ * Returns 0 if equal and  -1 otherwice
+ */
+static int msb_sg_compare_to_buffer(struct scatterlist *sg,
+                                       size_t offset, u8 *buffer, size_t len)
+{
+       int retval = 0, cmplen;
+       struct sg_mapping_iter miter;
+
+       sg_miter_start(&miter, sg, sg_nents(sg),
+                                       SG_MITER_ATOMIC | SG_MITER_FROM_SG);
+
+       while (sg_miter_next(&miter) && len > 0) {
+               if (offset >= miter.length) {
+                       offset -= miter.length;
+                       continue;
+               }
+
+               cmplen = min(miter.length - offset, len);
+               retval = memcmp(miter.addr + offset, buffer, cmplen) ? -1 : 0;
+               if (retval)
+                       break;
+
+               buffer += cmplen;
+               len -= cmplen;
+               offset = 0;
+       }
+
+       if (!retval && len)
+               retval = -1;
+
+       sg_miter_stop(&miter);
+       return retval;
+}
+
+
+/* Get zone at which block with logical address 'lba' lives
+ * Flash is broken into zones.
+ * Each zone consists of 512 eraseblocks, out of which in first
+ * zone 494 are used and 496 are for all following zones.
+ * Therefore zone #0 hosts blocks 0-493, zone #1 blocks 494-988, etc...
+*/
+static int msb_get_zone_from_lba(int lba)
+{
+       if (lba < 494)
+               return 0;
+       return ((lba - 494) / 496) + 1;
+}
+
+/* Get zone of physical block. Trivial */
+static int msb_get_zone_from_pba(int pba)
+{
+       return pba / MS_BLOCKS_IN_ZONE;
+}
+
+/* Debug test to validate free block counts */
+static int msb_validate_used_block_bitmap(struct msb_data *msb)
+{
+       int total_free_blocks = 0;
+       int i;
+
+       if (!debug)
+               return 0;
+
+       for (i = 0; i < msb->zone_count; i++)
+               total_free_blocks += msb->free_block_count[i];
+
+       if (msb->block_count - bitmap_weight(msb->used_blocks_bitmap,
+                                       msb->block_count) == total_free_blocks)
+               return 0;
+
+       pr_err("BUG: free block counts don't match the bitmap");
+       msb->read_only = true;
+       return -EINVAL;
+}
+
+/* Mark physical block as used */
+static void msb_mark_block_used(struct msb_data *msb, int pba)
+{
+       int zone = msb_get_zone_from_pba(pba);
+
+       if (test_bit(pba, msb->used_blocks_bitmap)) {
+               pr_err(
+               "BUG: attempt to mark already used pba %d as used", pba);
+               msb->read_only = true;
+               return;
+       }
+
+       if (msb_validate_used_block_bitmap(msb))
+               return;
+
+       /* No races because all IO is single threaded */
+       __set_bit(pba, msb->used_blocks_bitmap);
+       msb->free_block_count[zone]--;
+}
+
+/* Mark physical block as free */
+static void msb_mark_block_unused(struct msb_data *msb, int pba)
+{
+       int zone = msb_get_zone_from_pba(pba);
+
+       if (!test_bit(pba, msb->used_blocks_bitmap)) {
+               pr_err("BUG: attempt to mark already unused pba %d as unused" , pba);
+               msb->read_only = true;
+               return;
+       }
+
+       if (msb_validate_used_block_bitmap(msb))
+               return;
+
+       /* No races because all IO is single threaded */
+       __clear_bit(pba, msb->used_blocks_bitmap);
+       msb->free_block_count[zone]++;
+}
+
+/* Invalidate current register window */
+static void msb_invalidate_reg_window(struct msb_data *msb)
+{
+       msb->reg_addr.w_offset = offsetof(struct ms_register, id);
+       msb->reg_addr.w_length = sizeof(struct ms_id_register);
+       msb->reg_addr.r_offset = offsetof(struct ms_register, id);
+       msb->reg_addr.r_length = sizeof(struct ms_id_register);
+       msb->addr_valid = false;
+}
+
+/* Start a state machine */
+static int msb_run_state_machine(struct msb_data *msb, int   (*state_func)
+               (struct memstick_dev *card, struct memstick_request **req))
+{
+       struct memstick_dev *card = msb->card;
+
+       WARN_ON(msb->state != -1);
+       msb->int_polling = false;
+       msb->state = 0;
+       msb->exit_error = 0;
+
+       memset(&card->current_mrq, 0, sizeof(card->current_mrq));
+
+       card->next_request = state_func;
+       memstick_new_req(card->host);
+       wait_for_completion(&card->mrq_complete);
+
+       WARN_ON(msb->state != -1);
+       return msb->exit_error;
+}
+
+/* State machines call that to exit */
+static int msb_exit_state_machine(struct msb_data *msb, int error)
+{
+       WARN_ON(msb->state == -1);
+
+       msb->state = -1;
+       msb->exit_error = error;
+       msb->card->next_request = h_msb_default_bad;
+
+       /* Invalidate reg window on errors */
+       if (error)
+               msb_invalidate_reg_window(msb);
+
+       complete(&msb->card->mrq_complete);
+       return -ENXIO;
+}
+
+/* read INT register */
+static int msb_read_int_reg(struct msb_data *msb, long timeout)
+{
+       struct memstick_request *mrq = &msb->card->current_mrq;
+
+       WARN_ON(msb->state == -1);
+
+       if (!msb->int_polling) {
+               msb->int_timeout = jiffies +
+                       msecs_to_jiffies(timeout == -1 ? 500 : timeout);
+               msb->int_polling = true;
+       } else if (time_after(jiffies, msb->int_timeout)) {
+               mrq->data[0] = MEMSTICK_INT_CMDNAK;
+               return 0;
+       }
+
+       if ((msb->caps & MEMSTICK_CAP_AUTO_GET_INT) &&
+                               mrq->need_card_int && !mrq->error) {
+               mrq->data[0] = mrq->int_reg;
+               mrq->need_card_int = false;
+               return 0;
+       } else {
+               memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1);
+               return 1;
+       }
+}
+
+/* Read a register */
+static int msb_read_regs(struct msb_data *msb, int offset, int len)
+{
+       struct memstick_request *req = &msb->card->current_mrq;
+
+       if (msb->reg_addr.r_offset != offset ||
+           msb->reg_addr.r_length != len || !msb->addr_valid) {
+
+               msb->reg_addr.r_offset = offset;
+               msb->reg_addr.r_length = len;
+               msb->addr_valid = true;
+
+               memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS,
+                       &msb->reg_addr, sizeof(msb->reg_addr));
+               return 0;
+       }
+
+       memstick_init_req(req, MS_TPC_READ_REG, NULL, len);
+       return 1;
+}
+
+/* Write a card register */
+static int msb_write_regs(struct msb_data *msb, int offset, int len, void *buf)
+{
+       struct memstick_request *req = &msb->card->current_mrq;
+
+       if (msb->reg_addr.w_offset != offset ||
+               msb->reg_addr.w_length != len  || !msb->addr_valid) {
+
+               msb->reg_addr.w_offset = offset;
+               msb->reg_addr.w_length = len;
+               msb->addr_valid = true;
+
+               memstick_init_req(req, MS_TPC_SET_RW_REG_ADRS,
+                       &msb->reg_addr, sizeof(msb->reg_addr));
+               return 0;
+       }
+
+       memstick_init_req(req, MS_TPC_WRITE_REG, buf, len);
+       return 1;
+}
+
+/* Handler for absence of IO */
+static int h_msb_default_bad(struct memstick_dev *card,
+                                               struct memstick_request **mrq)
+{
+       return -ENXIO;
+}
+
+/*
+ * This function is a handler for reads of one page from device.
+ * Writes output to msb->current_sg, takes sector address from msb->reg.param
+ * Can also be used to read extra data only. Set params accordintly.
+ */
+static int h_msb_read_page(struct memstick_dev *card,
+                                       struct memstick_request **out_mrq)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+       struct scatterlist sg[2];
+       u8 command, intreg;
+
+       if (mrq->error) {
+               dbg("read_page, unknown error");
+               return msb_exit_state_machine(msb, mrq->error);
+       }
+again:
+       switch (msb->state) {
+       case MSB_RP_SEND_BLOCK_ADDRESS:
+               /* msb_write_regs sometimes "fails" because it needs to update
+                       the reg window, and thus it returns request for that.
+                       Then we stay in this state and retry */
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, param),
+                       sizeof(struct ms_param_register),
+                       (unsigned char *)&msb->regs.param))
+                       return 0;
+
+               msb->state = MSB_RP_SEND_READ_COMMAND;
+               return 0;
+
+       case MSB_RP_SEND_READ_COMMAND:
+               command = MS_CMD_BLOCK_READ;
+               memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+               msb->state = MSB_RP_SEND_INT_REQ;
+               return 0;
+
+       case MSB_RP_SEND_INT_REQ:
+               msb->state = MSB_RP_RECEIVE_INT_REQ_RESULT;
+               /* If dont actually need to send the int read request (only in
+                       serial mode), then just fall through */
+               if (msb_read_int_reg(msb, -1))
+                       return 0;
+               /* fallthrough */
+
+       case MSB_RP_RECEIVE_INT_REQ_RESULT:
+               intreg = mrq->data[0];
+               msb->regs.status.interrupt = intreg;
+
+               if (intreg & MEMSTICK_INT_CMDNAK)
+                       return msb_exit_state_machine(msb, -EIO);
+
+               if (!(intreg & MEMSTICK_INT_CED)) {
+                       msb->state = MSB_RP_SEND_INT_REQ;
+                       goto again;
+               }
+
+               msb->int_polling = false;
+               msb->state = (intreg & MEMSTICK_INT_ERR) ?
+                       MSB_RP_SEND_READ_STATUS_REG : MSB_RP_SEND_OOB_READ;
+               goto again;
+
+       case MSB_RP_SEND_READ_STATUS_REG:
+                /* read the status register to understand source of the INT_ERR */
+               if (!msb_read_regs(msb,
+                       offsetof(struct ms_register, status),
+                       sizeof(struct ms_status_register)))
+                       return 0;
+
+               msb->state = MSB_RP_RECEIVE_OOB_READ;
+               return 0;
+
+       case MSB_RP_RECIVE_STATUS_REG:
+               msb->regs.status = *(struct ms_status_register *)mrq->data;
+               msb->state = MSB_RP_SEND_OOB_READ;
+               /* fallthrough */
+
+       case MSB_RP_SEND_OOB_READ:
+               if (!msb_read_regs(msb,
+                       offsetof(struct ms_register, extra_data),
+                       sizeof(struct ms_extra_data_register)))
+                       return 0;
+
+               msb->state = MSB_RP_RECEIVE_OOB_READ;
+               return 0;
+
+       case MSB_RP_RECEIVE_OOB_READ:
+               msb->regs.extra_data =
+                       *(struct ms_extra_data_register *) mrq->data;
+               msb->state = MSB_RP_SEND_READ_DATA;
+               /* fallthrough */
+
+       case MSB_RP_SEND_READ_DATA:
+               /* Skip that state if we only read the oob */
+               if (msb->regs.param.cp == MEMSTICK_CP_EXTRA) {
+                       msb->state = MSB_RP_RECEIVE_READ_DATA;
+                       goto again;
+               }
+
+               sg_init_table(sg, ARRAY_SIZE(sg));
+               msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg),
+                       msb->current_sg_offset,
+                       msb->page_size);
+
+               memstick_init_req_sg(mrq, MS_TPC_READ_LONG_DATA, sg);
+               msb->state = MSB_RP_RECEIVE_READ_DATA;
+               return 0;
+
+       case MSB_RP_RECEIVE_READ_DATA:
+               if (!(msb->regs.status.interrupt & MEMSTICK_INT_ERR)) {
+                       msb->current_sg_offset += msb->page_size;
+                       return msb_exit_state_machine(msb, 0);
+               }
+
+               if (msb->regs.status.status1 & MEMSTICK_UNCORR_ERROR) {
+                       dbg("read_page: uncorrectable error");
+                       return msb_exit_state_machine(msb, -EBADMSG);
+               }
+
+               if (msb->regs.status.status1 & MEMSTICK_CORR_ERROR) {
+                       dbg("read_page: correctable error");
+                       msb->current_sg_offset += msb->page_size;
+                       return msb_exit_state_machine(msb, -EUCLEAN);
+               } else {
+                       dbg("read_page: INT error, but no status error bits");
+                       return msb_exit_state_machine(msb, -EIO);
+               }
+       }
+
+       BUG();
+}
+
+/*
+ * Handler of writes of exactly one block.
+ * Takes address from msb->regs.param.
+ * Writes same extra data to blocks, also taken
+ * from msb->regs.extra
+ * Returns -EBADMSG if write fails due to uncorrectable error, or -EIO if
+ * device refuses to take the command or something else
+ */
+static int h_msb_write_block(struct memstick_dev *card,
+                                       struct memstick_request **out_mrq)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+       struct scatterlist sg[2];
+       u8 intreg, command;
+
+       if (mrq->error)
+               return msb_exit_state_machine(msb, mrq->error);
+
+again:
+       switch (msb->state) {
+
+       /* HACK: Jmicon handling of TPCs between 8 and
+        *      sizeof(memstick_request.data) is broken due to hardware
+        *      bug in PIO mode that is used for these TPCs
+        *      Therefore split the write
+        */
+
+       case MSB_WB_SEND_WRITE_PARAMS:
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, param),
+                       sizeof(struct ms_param_register),
+                       &msb->regs.param))
+                       return 0;
+
+               msb->state = MSB_WB_SEND_WRITE_OOB;
+               return 0;
+
+       case MSB_WB_SEND_WRITE_OOB:
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, extra_data),
+                       sizeof(struct ms_extra_data_register),
+                       &msb->regs.extra_data))
+                       return 0;
+               msb->state = MSB_WB_SEND_WRITE_COMMAND;
+               return 0;
+
+
+       case MSB_WB_SEND_WRITE_COMMAND:
+               command = MS_CMD_BLOCK_WRITE;
+               memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+               msb->state = MSB_WB_SEND_INT_REQ;
+               return 0;
+
+       case MSB_WB_SEND_INT_REQ:
+               msb->state = MSB_WB_RECEIVE_INT_REQ;
+               if (msb_read_int_reg(msb, -1))
+                       return 0;
+               /* fallthrough */
+
+       case MSB_WB_RECEIVE_INT_REQ:
+               intreg = mrq->data[0];
+               msb->regs.status.interrupt = intreg;
+
+               /* errors mean out of here, and fast... */
+               if (intreg & (MEMSTICK_INT_CMDNAK))
+                       return msb_exit_state_machine(msb, -EIO);
+
+               if (intreg & MEMSTICK_INT_ERR)
+                       return msb_exit_state_machine(msb, -EBADMSG);
+
+
+               /* for last page we need to poll CED */
+               if (msb->current_page == msb->pages_in_block) {
+                       if (intreg & MEMSTICK_INT_CED)
+                               return msb_exit_state_machine(msb, 0);
+                       msb->state = MSB_WB_SEND_INT_REQ;
+                       goto again;
+
+               }
+
+               /* for non-last page we need BREQ before writing next chunk */
+               if (!(intreg & MEMSTICK_INT_BREQ)) {
+                       msb->state = MSB_WB_SEND_INT_REQ;
+                       goto again;
+               }
+
+               msb->int_polling = false;
+               msb->state = MSB_WB_SEND_WRITE_DATA;
+               /* fallthrough */
+
+       case MSB_WB_SEND_WRITE_DATA:
+               sg_init_table(sg, ARRAY_SIZE(sg));
+
+               if (msb_sg_copy(msb->current_sg, sg, ARRAY_SIZE(sg),
+                       msb->current_sg_offset,
+                       msb->page_size) < msb->page_size)
+                       return msb_exit_state_machine(msb, -EIO);
+
+               memstick_init_req_sg(mrq, MS_TPC_WRITE_LONG_DATA, sg);
+               mrq->need_card_int = 1;
+               msb->state = MSB_WB_RECEIVE_WRITE_CONFIRMATION;
+               return 0;
+
+       case MSB_WB_RECEIVE_WRITE_CONFIRMATION:
+               msb->current_page++;
+               msb->current_sg_offset += msb->page_size;
+               msb->state = MSB_WB_SEND_INT_REQ;
+               goto again;
+       default:
+               BUG();
+       }
+
+       return 0;
+}
+
+/*
+ * This function is used to send simple IO requests to device that consist
+ * of register write + command
+ */
+static int h_msb_send_command(struct memstick_dev *card,
+                                       struct memstick_request **out_mrq)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+       u8 intreg;
+
+       if (mrq->error) {
+               dbg("send_command: unknown error");
+               return msb_exit_state_machine(msb, mrq->error);
+       }
+again:
+       switch (msb->state) {
+
+       /* HACK: see h_msb_write_block */
+       case MSB_SC_SEND_WRITE_PARAMS: /* write param register*/
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, param),
+                       sizeof(struct ms_param_register),
+                       &msb->regs.param))
+                       return 0;
+               msb->state = MSB_SC_SEND_WRITE_OOB;
+               return 0;
+
+       case MSB_SC_SEND_WRITE_OOB:
+               if (!msb->command_need_oob) {
+                       msb->state = MSB_SC_SEND_COMMAND;
+                       goto again;
+               }
+
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, extra_data),
+                       sizeof(struct ms_extra_data_register),
+                       &msb->regs.extra_data))
+                       return 0;
+
+               msb->state = MSB_SC_SEND_COMMAND;
+               return 0;
+
+       case MSB_SC_SEND_COMMAND:
+               memstick_init_req(mrq, MS_TPC_SET_CMD, &msb->command_value, 1);
+               msb->state = MSB_SC_SEND_INT_REQ;
+               return 0;
+
+       case MSB_SC_SEND_INT_REQ:
+               msb->state = MSB_SC_RECEIVE_INT_REQ;
+               if (msb_read_int_reg(msb, -1))
+                       return 0;
+               /* fallthrough */
+
+       case MSB_SC_RECEIVE_INT_REQ:
+               intreg = mrq->data[0];
+
+               if (intreg & MEMSTICK_INT_CMDNAK)
+                       return msb_exit_state_machine(msb, -EIO);
+               if (intreg & MEMSTICK_INT_ERR)
+                       return msb_exit_state_machine(msb, -EBADMSG);
+
+               if (!(intreg & MEMSTICK_INT_CED)) {
+                       msb->state = MSB_SC_SEND_INT_REQ;
+                       goto again;
+               }
+
+               return msb_exit_state_machine(msb, 0);
+       }
+
+       BUG();
+}
+
+/* Small handler for card reset */
+static int h_msb_reset(struct memstick_dev *card,
+                                       struct memstick_request **out_mrq)
+{
+       u8 command = MS_CMD_RESET;
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+
+       if (mrq->error)
+               return msb_exit_state_machine(msb, mrq->error);
+
+       switch (msb->state) {
+       case MSB_RS_SEND:
+               memstick_init_req(mrq, MS_TPC_SET_CMD, &command, 1);
+               mrq->need_card_int = 0;
+               msb->state = MSB_RS_CONFIRM;
+               return 0;
+       case MSB_RS_CONFIRM:
+               return msb_exit_state_machine(msb, 0);
+       }
+       BUG();
+}
+
+/* This handler is used to do serial->parallel switch */
+static int h_msb_parallel_switch(struct memstick_dev *card,
+                                       struct memstick_request **out_mrq)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_request *mrq = *out_mrq = &card->current_mrq;
+       struct memstick_host *host = card->host;
+
+       if (mrq->error) {
+               dbg("parallel_switch: error");
+               msb->regs.param.system &= ~MEMSTICK_SYS_PAM;
+               return msb_exit_state_machine(msb, mrq->error);
+       }
+
+       switch (msb->state) {
+       case MSB_PS_SEND_SWITCH_COMMAND:
+               /* Set the parallel interface on memstick side */
+               msb->regs.param.system |= MEMSTICK_SYS_PAM;
+
+               if (!msb_write_regs(msb,
+                       offsetof(struct ms_register, param),
+                       1,
+                       (unsigned char *)&msb->regs.param))
+                       return 0;
+
+               msb->state = MSB_PS_SWICH_HOST;
+               return 0;
+
+       case MSB_PS_SWICH_HOST:
+                /* Set parallel interface on our side + send a dummy request
+                       to see if card responds */
+               host->set_param(host, MEMSTICK_INTERFACE, MEMSTICK_PAR4);
+               memstick_init_req(mrq, MS_TPC_GET_INT, NULL, 1);
+               msb->state = MSB_PS_CONFIRM;
+               return 0;
+
+       case MSB_PS_CONFIRM:
+               return msb_exit_state_machine(msb, 0);
+       }
+
+       BUG();
+}
+
+static int msb_switch_to_parallel(struct msb_data *msb);
+
+/* Reset the card, to guard against hw errors beeing treated as bad blocks */
+static int msb_reset(struct msb_data *msb, bool full)
+{
+
+       bool was_parallel = msb->regs.param.system & MEMSTICK_SYS_PAM;
+       struct memstick_dev *card = msb->card;
+       struct memstick_host *host = card->host;
+       int error;
+
+       /* Reset the card */
+       msb->regs.param.system = MEMSTICK_SYS_BAMD;
+
+       if (full) {
+               error =  host->set_param(host,
+                                       MEMSTICK_POWER, MEMSTICK_POWER_OFF);
+               if (error)
+                       goto out_error;
+
+               msb_invalidate_reg_window(msb);
+
+               error = host->set_param(host,
+                                       MEMSTICK_POWER, MEMSTICK_POWER_ON);
+               if (error)
+                       goto out_error;
+
+               error = host->set_param(host,
+                                       MEMSTICK_INTERFACE, MEMSTICK_SERIAL);
+               if (error) {
+out_error:
+                       dbg("Failed to reset the host controller");
+                       msb->read_only = true;
+                       return -EFAULT;
+               }
+       }
+
+       error = msb_run_state_machine(msb, h_msb_reset);
+       if (error) {
+               dbg("Failed to reset the card");
+               msb->read_only = true;
+               return -ENODEV;
+       }
+
+       /* Set parallel mode */
+       if (was_parallel)
+               msb_switch_to_parallel(msb);
+       return 0;
+}
+
+/* Attempts to switch interface to parallel mode */
+static int msb_switch_to_parallel(struct msb_data *msb)
+{
+       int error;
+
+       error = msb_run_state_machine(msb, h_msb_parallel_switch);
+       if (error) {
+               pr_err("Switch to parallel failed");
+               msb->regs.param.system &= ~MEMSTICK_SYS_PAM;
+               msb_reset(msb, true);
+               return -EFAULT;
+       }
+
+       msb->caps |= MEMSTICK_CAP_AUTO_GET_INT;
+       return 0;
+}
+
+/* Changes overwrite flag on a page */
+static int msb_set_overwrite_flag(struct msb_data *msb,
+                                               u16 pba, u8 page, u8 flag)
+{
+       if (msb->read_only)
+               return -EROFS;
+
+       msb->regs.param.block_address = cpu_to_be16(pba);
+       msb->regs.param.page_address = page;
+       msb->regs.param.cp = MEMSTICK_CP_OVERWRITE;
+       msb->regs.extra_data.overwrite_flag = flag;
+       msb->command_value = MS_CMD_BLOCK_WRITE;
+       msb->command_need_oob = true;
+
+       dbg_verbose("changing overwrite flag to %02x for sector %d, page %d",
+                                                       flag, pba, page);
+       return msb_run_state_machine(msb, h_msb_send_command);
+}
+
+static int msb_mark_bad(struct msb_data *msb, int pba)
+{
+       pr_notice("marking pba %d as bad", pba);
+       msb_reset(msb, true);
+       return msb_set_overwrite_flag(
+                       msb, pba, 0, 0xFF & ~MEMSTICK_OVERWRITE_BKST);
+}
+
+static int msb_mark_page_bad(struct msb_data *msb, int pba, int page)
+{
+       dbg("marking page %d of pba %d as bad", page, pba);
+       msb_reset(msb, true);
+       return msb_set_overwrite_flag(msb,
+               pba, page, ~MEMSTICK_OVERWRITE_PGST0);
+}
+
+/* Erases one physical block */
+static int msb_erase_block(struct msb_data *msb, u16 pba)
+{
+       int error, try;
+       if (msb->read_only)
+               return -EROFS;
+
+       dbg_verbose("erasing pba %d", pba);
+
+       for (try = 1; try < 3; try++) {
+               msb->regs.param.block_address = cpu_to_be16(pba);
+               msb->regs.param.page_address = 0;
+               msb->regs.param.cp = MEMSTICK_CP_BLOCK;
+               msb->command_value = MS_CMD_BLOCK_ERASE;
+               msb->command_need_oob = false;
+
+
+               error = msb_run_state_machine(msb, h_msb_send_command);
+               if (!error || msb_reset(msb, true))
+                       break;
+       }
+
+       if (error) {
+               pr_err("erase failed, marking pba %d as bad", pba);
+               msb_mark_bad(msb, pba);
+       }
+
+       dbg_verbose("erase success, marking pba %d as unused", pba);
+       msb_mark_block_unused(msb, pba);
+       __set_bit(pba, msb->erased_blocks_bitmap);
+       return error;
+}
+
+/* Reads one page from device */
+static int msb_read_page(struct msb_data *msb,
+       u16 pba, u8 page, struct ms_extra_data_register *extra,
+                                       struct scatterlist *sg,  int offset)
+{
+       int try, error;
+
+       if (pba == MS_BLOCK_INVALID) {
+               unsigned long flags;
+               struct sg_mapping_iter miter;
+               size_t len = msb->page_size;
+
+               dbg_verbose("read unmapped sector. returning 0xFF");
+
+               local_irq_save(flags);
+               sg_miter_start(&miter, sg, sg_nents(sg),
+                               SG_MITER_ATOMIC | SG_MITER_TO_SG);
+
+               while (sg_miter_next(&miter) && len > 0) {
+
+                       int chunklen;
+
+                       if (offset && offset >= miter.length) {
+                               offset -= miter.length;
+                               continue;
+                       }
+
+                       chunklen = min(miter.length - offset, len);
+                       memset(miter.addr + offset, 0xFF, chunklen);
+                       len -= chunklen;
+                       offset = 0;
+               }
+
+               sg_miter_stop(&miter);
+               local_irq_restore(flags);
+
+               if (offset)
+                       return -EFAULT;
+
+               if (extra)
+                       memset(extra, 0xFF, sizeof(*extra));
+               return 0;
+       }
+
+       if (pba >= msb->block_count) {
+               pr_err("BUG: attempt to read beyond the end of the card at pba %d", pba);
+               return -EINVAL;
+       }
+
+       for (try = 1; try < 3; try++) {
+               msb->regs.param.block_address = cpu_to_be16(pba);
+               msb->regs.param.page_address = page;
+               msb->regs.param.cp = MEMSTICK_CP_PAGE;
+
+               msb->current_sg = sg;
+               msb->current_sg_offset = offset;
+               error = msb_run_state_machine(msb, h_msb_read_page);
+
+
+               if (error == -EUCLEAN) {
+                       pr_notice("correctable error on pba %d, page %d",
+                               pba, page);
+                       error = 0;
+               }
+
+               if (!error && extra)
+                       *extra = msb->regs.extra_data;
+
+               if (!error || msb_reset(msb, true))
+                       break;
+
+       }
+
+       /* Mark bad pages */
+       if (error == -EBADMSG) {
+               pr_err("uncorrectable error on read of pba %d, page %d",
+                       pba, page);
+
+               if (msb->regs.extra_data.overwrite_flag &
+                                       MEMSTICK_OVERWRITE_PGST0)
+                       msb_mark_page_bad(msb, pba, page);
+               return -EBADMSG;
+       }
+
+       if (error)
+               pr_err("read of pba %d, page %d failed with error %d",
+                       pba, page, error);
+       return error;
+}
+
+/* Reads oob of page only */
+static int msb_read_oob(struct msb_data *msb, u16 pba, u16 page,
+       struct ms_extra_data_register *extra)
+{
+       int error;
+
+       BUG_ON(!extra);
+       msb->regs.param.block_address = cpu_to_be16(pba);
+       msb->regs.param.page_address = page;
+       msb->regs.param.cp = MEMSTICK_CP_EXTRA;
+
+       if (pba > msb->block_count) {
+               pr_err("BUG: attempt to read beyond the end of card at pba %d", pba);
+               return -EINVAL;
+       }
+
+       error = msb_run_state_machine(msb, h_msb_read_page);
+       *extra = msb->regs.extra_data;
+
+       if (error == -EUCLEAN) {
+               pr_notice("correctable error on pba %d, page %d",
+                       pba, page);
+               return 0;
+       }
+
+       return error;
+}
+
+/* Reads a block and compares it with data contained in scatterlist orig_sg */
+static int msb_verify_block(struct msb_data *msb, u16 pba,
+                               struct scatterlist *orig_sg,  int offset)
+{
+       struct scatterlist sg;
+       int page = 0, error;
+
+       sg_init_one(&sg, msb->block_buffer, msb->block_size);
+
+       while (page < msb->pages_in_block) {
+
+               error = msb_read_page(msb, pba, page,
+                               NULL, &sg, page * msb->page_size);
+               if (error)
+                       return error;
+               page++;
+       }
+
+       if (msb_sg_compare_to_buffer(orig_sg, offset,
+                               msb->block_buffer, msb->block_size))
+               return -EIO;
+       return 0;
+}
+
+/* Writes exectly one block + oob */
+static int msb_write_block(struct msb_data *msb,
+                       u16 pba, u32 lba, struct scatterlist *sg, int offset)
+{
+       int error, current_try = 1;
+       BUG_ON(sg->length < msb->page_size);
+
+       if (msb->read_only)
+               return -EROFS;
+
+       if (pba == MS_BLOCK_INVALID) {
+               pr_err(
+                       "BUG: write: attempt to write MS_BLOCK_INVALID block");
+               return -EINVAL;
+       }
+
+       if (pba >= msb->block_count || lba >= msb->logical_block_count) {
+               pr_err(
+               "BUG: write: attempt to write beyond the end of device");
+               return -EINVAL;
+       }
+
+       if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) {
+               pr_err("BUG: write: lba zone mismatch");
+               return -EINVAL;
+       }
+
+       if (pba == msb->boot_block_locations[0] ||
+               pba == msb->boot_block_locations[1]) {
+               pr_err("BUG: write: attempt to write to boot blocks!");
+               return -EINVAL;
+       }
+
+       while (1) {
+
+               if (msb->read_only)
+                       return -EROFS;
+
+               msb->regs.param.cp = MEMSTICK_CP_BLOCK;
+               msb->regs.param.page_address = 0;
+               msb->regs.param.block_address = cpu_to_be16(pba);
+
+               msb->regs.extra_data.management_flag = 0xFF;
+               msb->regs.extra_data.overwrite_flag = 0xF8;
+               msb->regs.extra_data.logical_address = cpu_to_be16(lba);
+
+               msb->current_sg = sg;
+               msb->current_sg_offset = offset;
+               msb->current_page = 0;
+
+               error = msb_run_state_machine(msb, h_msb_write_block);
+
+               /* Sector we just wrote to is assumed erased since its pba
+                       was erased. If it wasn't erased, write will succeed
+                       and will just clear the bits that were set in the block
+                       thus test that what we have written,
+                       matches what we expect.
+                       We do trust the blocks that we erased */
+               if (!error && (verify_writes ||
+                               !test_bit(pba, msb->erased_blocks_bitmap)))
+                       error = msb_verify_block(msb, pba, sg, offset);
+
+               if (!error)
+                       break;
+
+               if (current_try > 1 || msb_reset(msb, true))
+                       break;
+
+               pr_err("write failed, trying to erase the pba %d", pba);
+               error = msb_erase_block(msb, pba);
+               if (error)
+                       break;
+
+               current_try++;
+       }
+       return error;
+}
+
+/* Finds a free block for write replacement */
+static u16 msb_get_free_block(struct msb_data *msb, int zone)
+{
+       u16 pos;
+       int pba = zone * MS_BLOCKS_IN_ZONE;
+       int i;
+
+       get_random_bytes(&pos, sizeof(pos));
+
+       if (!msb->free_block_count[zone]) {
+               pr_err("NO free blocks in the zone %d, to use for a write, (media is WORN out) switching to RO mode", zone);
+               msb->read_only = true;
+               return MS_BLOCK_INVALID;
+       }
+
+       pos %= msb->free_block_count[zone];
+
+       dbg_verbose("have %d choices for a free block, selected randomally: %d",
+               msb->free_block_count[zone], pos);
+
+       pba = find_next_zero_bit(msb->used_blocks_bitmap,
+                                                       msb->block_count, pba);
+       for (i = 0; i < pos; ++i)
+               pba = find_next_zero_bit(msb->used_blocks_bitmap,
+                                               msb->block_count, pba + 1);
+
+       dbg_verbose("result of the free blocks scan: pba %d", pba);
+
+       if (pba == msb->block_count || (msb_get_zone_from_pba(pba)) != zone) {
+               pr_err("BUG: cant get a free block");
+               msb->read_only = true;
+               return MS_BLOCK_INVALID;
+       }
+
+       msb_mark_block_used(msb, pba);
+       return pba;
+}
+
+static int msb_update_block(struct msb_data *msb, u16 lba,
+       struct scatterlist *sg, int offset)
+{
+       u16 pba, new_pba;
+       int error, try;
+
+       pba = msb->lba_to_pba_table[lba];
+       dbg_verbose("start of a block update at lba  %d, pba %d", lba, pba);
+
+       if (pba != MS_BLOCK_INVALID) {
+               dbg_verbose("setting the update flag on the block");
+               msb_set_overwrite_flag(msb, pba, 0,
+                               0xFF & ~MEMSTICK_OVERWRITE_UDST);
+       }
+
+       for (try = 0; try < 3; try++) {
+               new_pba = msb_get_free_block(msb,
+                       msb_get_zone_from_lba(lba));
+
+               if (new_pba == MS_BLOCK_INVALID) {
+                       error = -EIO;
+                       goto out;
+               }
+
+               dbg_verbose("block update: writing updated block to the pba %d",
+                                                               new_pba);
+               error = msb_write_block(msb, new_pba, lba, sg, offset);
+               if (error == -EBADMSG) {
+                       msb_mark_bad(msb, new_pba);
+                       continue;
+               }
+
+               if (error)
+                       goto out;
+
+               dbg_verbose("block update: erasing the old block");
+               msb_erase_block(msb, pba);
+               msb->lba_to_pba_table[lba] = new_pba;
+               return 0;
+       }
+out:
+       if (error) {
+               pr_err("block update error after %d tries,  switching to r/o mode", try);
+               msb->read_only = true;
+       }
+       return error;
+}
+
+/* Converts endiannes in the boot block for easy use */
+static void msb_fix_boot_page_endianness(struct ms_boot_page *p)
+{
+       p->header.block_id = be16_to_cpu(p->header.block_id);
+       p->header.format_reserved = be16_to_cpu(p->header.format_reserved);
+       p->entry.disabled_block.start_addr
+               = be32_to_cpu(p->entry.disabled_block.start_addr);
+       p->entry.disabled_block.data_size
+               = be32_to_cpu(p->entry.disabled_block.data_size);
+       p->entry.cis_idi.start_addr
+               = be32_to_cpu(p->entry.cis_idi.start_addr);
+       p->entry.cis_idi.data_size
+               = be32_to_cpu(p->entry.cis_idi.data_size);
+       p->attr.block_size = be16_to_cpu(p->attr.block_size);
+       p->attr.number_of_blocks = be16_to_cpu(p->attr.number_of_blocks);
+       p->attr.number_of_effective_blocks
+               = be16_to_cpu(p->attr.number_of_effective_blocks);
+       p->attr.page_size = be16_to_cpu(p->attr.page_size);
+       p->attr.memory_manufacturer_code
+               = be16_to_cpu(p->attr.memory_manufacturer_code);
+       p->attr.memory_device_code = be16_to_cpu(p->attr.memory_device_code);
+       p->attr.implemented_capacity
+               = be16_to_cpu(p->attr.implemented_capacity);
+       p->attr.controller_number = be16_to_cpu(p->attr.controller_number);
+       p->attr.controller_function = be16_to_cpu(p->attr.controller_function);
+}
+
+static int msb_read_boot_blocks(struct msb_data *msb)
+{
+       int pba = 0;
+       struct scatterlist sg;
+       struct ms_extra_data_register extra;
+       struct ms_boot_page *page;
+
+       msb->boot_block_locations[0] = MS_BLOCK_INVALID;
+       msb->boot_block_locations[1] = MS_BLOCK_INVALID;
+       msb->boot_block_count = 0;
+
+       dbg_verbose("Start of a scan for the boot blocks");
+
+       if (!msb->boot_page) {
+               page = kmalloc(sizeof(struct ms_boot_page)*2, GFP_KERNEL);
+               if (!page)
+                       return -ENOMEM;
+
+               msb->boot_page = page;
+       } else
+               page = msb->boot_page;
+
+       msb->block_count = MS_BLOCK_MAX_BOOT_ADDR;
+
+       for (pba = 0; pba < MS_BLOCK_MAX_BOOT_ADDR; pba++) {
+
+               sg_init_one(&sg, page, sizeof(*page));
+               if (msb_read_page(msb, pba, 0, &extra, &sg, 0)) {
+                       dbg("boot scan: can't read pba %d", pba);
+                       continue;
+               }
+
+               if (extra.management_flag & MEMSTICK_MANAGEMENT_SYSFLG) {
+                       dbg("managment flag doesn't indicate boot block %d",
+                                                                       pba);
+                       continue;
+               }
+
+               if (be16_to_cpu(page->header.block_id) != MS_BLOCK_BOOT_ID) {
+                       dbg("the pba at %d doesn' contain boot block ID", pba);
+                       continue;
+               }
+
+               msb_fix_boot_page_endianness(page);
+               msb->boot_block_locations[msb->boot_block_count] = pba;
+
+               page++;
+               msb->boot_block_count++;
+
+               if (msb->boot_block_count == 2)
+                       break;
+       }
+
+       if (!msb->boot_block_count) {
+               pr_err("media doesn't contain master page, aborting");
+               return -EIO;
+       }
+
+       dbg_verbose("End of scan for boot blocks");
+       return 0;
+}
+
+static int msb_read_bad_block_table(struct msb_data *msb, int block_nr)
+{
+       struct ms_boot_page *boot_block;
+       struct scatterlist sg;
+       u16 *buffer = NULL;
+       int offset = 0;
+       int i, error = 0;
+       int data_size, data_offset, page, page_offset, size_to_read;
+       u16 pba;
+
+       BUG_ON(block_nr > 1);
+       boot_block = &msb->boot_page[block_nr];
+       pba = msb->boot_block_locations[block_nr];
+
+       if (msb->boot_block_locations[block_nr] == MS_BLOCK_INVALID)
+               return -EINVAL;
+
+       data_size = boot_block->entry.disabled_block.data_size;
+       data_offset = sizeof(struct ms_boot_page) +
+                       boot_block->entry.disabled_block.start_addr;
+       if (!data_size)
+               return 0;
+
+       page = data_offset / msb->page_size;
+       page_offset = data_offset % msb->page_size;
+       size_to_read =
+               DIV_ROUND_UP(data_size + page_offset, msb->page_size) *
+                       msb->page_size;
+
+       dbg("reading bad block of boot block at pba %d, offset %d len %d",
+               pba, data_offset, data_size);
+
+       buffer = kzalloc(size_to_read, GFP_KERNEL);
+       if (!buffer)
+               return -ENOMEM;
+
+       /* Read the buffer */
+       sg_init_one(&sg, buffer, size_to_read);
+
+       while (offset < size_to_read) {
+               error = msb_read_page(msb, pba, page, NULL, &sg, offset);
+               if (error)
+                       goto out;
+
+               page++;
+               offset += msb->page_size;
+
+               if (page == msb->pages_in_block) {
+                       pr_err(
+                       "bad block table extends beyond the boot block");
+                       break;
+               }
+       }
+
+       /* Process the bad block table */
+       for (i = page_offset; i < data_size / sizeof(u16); i++) {
+
+               u16 bad_block = be16_to_cpu(buffer[i]);
+
+               if (bad_block >= msb->block_count) {
+                       dbg("bad block table contains invalid block %d",
+                                                               bad_block);
+                       continue;
+               }
+
+               if (test_bit(bad_block, msb->used_blocks_bitmap))  {
+                       dbg("duplicate bad block %d in the table",
+                               bad_block);
+                       continue;
+               }
+
+               dbg("block %d is marked as factory bad", bad_block);
+               msb_mark_block_used(msb, bad_block);
+       }
+out:
+       kfree(buffer);
+       return error;
+}
+
+static int msb_ftl_initialize(struct msb_data *msb)
+{
+       int i;
+
+       if (msb->ftl_initialized)
+               return 0;
+
+       msb->zone_count = msb->block_count / MS_BLOCKS_IN_ZONE;
+       msb->logical_block_count = msb->zone_count * 496 - 2;
+
+       msb->used_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
+       msb->erased_blocks_bitmap = kzalloc(msb->block_count / 8, GFP_KERNEL);
+       msb->lba_to_pba_table =
+               kmalloc(msb->logical_block_count * sizeof(u16), GFP_KERNEL);
+
+       if (!msb->used_blocks_bitmap || !msb->lba_to_pba_table ||
+                                               !msb->erased_blocks_bitmap) {
+               kfree(msb->used_blocks_bitmap);
+               kfree(msb->lba_to_pba_table);
+               kfree(msb->erased_blocks_bitmap);
+               return -ENOMEM;
+       }
+
+       for (i = 0; i < msb->zone_count; i++)
+               msb->free_block_count[i] = MS_BLOCKS_IN_ZONE;
+
+       memset(msb->lba_to_pba_table, MS_BLOCK_INVALID,
+                       msb->logical_block_count * sizeof(u16));
+
+       dbg("initial FTL tables created. Zone count = %d, Logical block count = %d",
+               msb->zone_count, msb->logical_block_count);
+
+       msb->ftl_initialized = true;
+       return 0;
+}
+
+static int msb_ftl_scan(struct msb_data *msb)
+{
+       u16 pba, lba, other_block;
+       u8 overwrite_flag, managment_flag, other_overwrite_flag;
+       int error;
+       struct ms_extra_data_register extra;
+       u8 *overwrite_flags = kzalloc(msb->block_count, GFP_KERNEL);
+
+       if (!overwrite_flags)
+               return -ENOMEM;
+
+       dbg("Start of media scanning");
+       for (pba = 0; pba < msb->block_count; pba++) {
+
+               if (pba == msb->boot_block_locations[0] ||
+                       pba == msb->boot_block_locations[1]) {
+                       dbg_verbose("pba %05d -> [boot block]", pba);
+                       msb_mark_block_used(msb, pba);
+                       continue;
+               }
+
+               if (test_bit(pba, msb->used_blocks_bitmap)) {
+                       dbg_verbose("pba %05d -> [factory bad]", pba);
+                       continue;
+               }
+
+               memset(&extra, 0, sizeof(extra));
+               error = msb_read_oob(msb, pba, 0, &extra);
+
+               /* can't trust the page if we can't read the oob */
+               if (error == -EBADMSG) {
+                       pr_notice(
+                       "oob of pba %d damaged, will try to erase it", pba);
+                       msb_mark_block_used(msb, pba);
+                       msb_erase_block(msb, pba);
+                       continue;
+               } else if (error) {
+                       pr_err("unknown error %d on read of oob of pba %d - aborting",
+                               error, pba);
+
+                       kfree(overwrite_flags);
+                       return error;
+               }
+
+               lba = be16_to_cpu(extra.logical_address);
+               managment_flag = extra.management_flag;
+               overwrite_flag = extra.overwrite_flag;
+               overwrite_flags[pba] = overwrite_flag;
+
+               /* Skip bad blocks */
+               if (!(overwrite_flag & MEMSTICK_OVERWRITE_BKST)) {
+                       dbg("pba %05d -> [BAD]", pba);
+                       msb_mark_block_used(msb, pba);
+                       continue;
+               }
+
+               /* Skip system/drm blocks */
+               if ((managment_flag & MEMSTICK_MANAGMENT_FLAG_NORMAL) !=
+                       MEMSTICK_MANAGMENT_FLAG_NORMAL) {
+                       dbg("pba %05d -> [reserved managment flag %02x]",
+                                                       pba, managment_flag);
+                       msb_mark_block_used(msb, pba);
+                       continue;
+               }
+
+               /* Erase temporary tables */
+               if (!(managment_flag & MEMSTICK_MANAGEMENT_ATFLG)) {
+                       dbg("pba %05d -> [temp table] - will erase", pba);
+
+                       msb_mark_block_used(msb, pba);
+                       msb_erase_block(msb, pba);
+                       continue;
+               }
+
+               if (lba == MS_BLOCK_INVALID) {
+                       dbg_verbose("pba %05d -> [free]", pba);
+                       continue;
+               }
+
+               msb_mark_block_used(msb, pba);
+
+               /* Block has LBA not according to zoning*/
+               if (msb_get_zone_from_lba(lba) != msb_get_zone_from_pba(pba)) {
+                       pr_notice("pba %05d -> [bad lba %05d] - will erase",
+                                                               pba, lba);
+                       msb_erase_block(msb, pba);
+                       continue;
+               }
+
+               /* No collisions - great */
+               if (msb->lba_to_pba_table[lba] == MS_BLOCK_INVALID) {
+                       dbg_verbose("pba %05d -> [lba %05d]", pba, lba);
+                       msb->lba_to_pba_table[lba] = pba;
+                       continue;
+               }
+
+               other_block = msb->lba_to_pba_table[lba];
+               other_overwrite_flag = overwrite_flags[other_block];
+
+               pr_notice("Collision between pba %d and pba %d",
+                       pba, other_block);
+
+               if (!(overwrite_flag & MEMSTICK_OVERWRITE_UDST)) {
+                       pr_notice("pba %d is marked as stable, use it", pba);
+                       msb_erase_block(msb, other_block);
+                       msb->lba_to_pba_table[lba] = pba;
+                       continue;
+               }
+
+               if (!(other_overwrite_flag & MEMSTICK_OVERWRITE_UDST)) {
+                       pr_notice("pba %d is marked as stable, use it",
+                                                               other_block);
+                       msb_erase_block(msb, pba);
+                       continue;
+               }
+
+               pr_notice("collision between blocks %d and %d, without stable flag set on both, erasing pba %d",
+                               pba, other_block, other_block);
+
+               msb_erase_block(msb, other_block);
+               msb->lba_to_pba_table[lba] = pba;
+       }
+
+       dbg("End of media scanning");
+       kfree(overwrite_flags);
+       return 0;
+}
+
+static void msb_cache_flush_timer(unsigned long data)
+{
+       struct msb_data *msb = (struct msb_data *)data;
+       msb->need_flush_cache = true;
+       queue_work(msb->io_queue, &msb->io_work);
+}
+
+
+static void msb_cache_discard(struct msb_data *msb)
+{
+       if (msb->cache_block_lba == MS_BLOCK_INVALID)
+               return;
+
+       del_timer_sync(&msb->cache_flush_timer);
+
+       dbg_verbose("Discarding the write cache");
+       msb->cache_block_lba = MS_BLOCK_INVALID;
+       bitmap_zero(&msb->valid_cache_bitmap, msb->pages_in_block);
+}
+
+static int msb_cache_init(struct msb_data *msb)
+{
+       setup_timer(&msb->cache_flush_timer, msb_cache_flush_timer,
+               (unsigned long)msb);
+
+       if (!msb->cache)
+               msb->cache = kzalloc(msb->block_size, GFP_KERNEL);
+       if (!msb->cache)
+               return -ENOMEM;
+
+       msb_cache_discard(msb);
+       return 0;
+}
+
+static int msb_cache_flush(struct msb_data *msb)
+{
+       struct scatterlist sg;
+       struct ms_extra_data_register extra;
+       int page, offset, error;
+       u16 pba, lba;
+
+       if (msb->read_only)
+               return -EROFS;
+
+       if (msb->cache_block_lba == MS_BLOCK_INVALID)
+               return 0;
+
+       lba = msb->cache_block_lba;
+       pba = msb->lba_to_pba_table[lba];
+
+       dbg_verbose("Flushing the write cache of pba %d (LBA %d)",
+                                               pba, msb->cache_block_lba);
+
+       sg_init_one(&sg, msb->cache , msb->block_size);
+
+       /* Read all missing pages in cache */
+       for (page = 0; page < msb->pages_in_block; page++) {
+
+               if (test_bit(page, &msb->valid_cache_bitmap))
+                       continue;
+
+               offset = page * msb->page_size;
+
+               dbg_verbose("reading non-present sector %d of cache block %d",
+                       page, lba);
+               error = msb_read_page(msb, pba, page, &extra, &sg, offset);
+
+               /* Bad pages are copied with 00 page status */
+               if (error == -EBADMSG) {
+                       pr_err("read error on sector %d, contents probably damaged", page);
+                       continue;
+               }
+
+               if (error)
+                       return error;
+
+               if ((extra.overwrite_flag & MEMSTICK_OV_PG_NORMAL) !=
+                                                       MEMSTICK_OV_PG_NORMAL) {
+                       dbg("page %d is marked as bad", page);
+                       continue;
+               }
+
+               set_bit(page, &msb->valid_cache_bitmap);
+       }
+
+       /* Write the cache now */
+       error = msb_update_block(msb, msb->cache_block_lba, &sg, 0);
+       pba = msb->lba_to_pba_table[msb->cache_block_lba];
+
+       /* Mark invalid pages */
+       if (!error) {
+               for (page = 0; page < msb->pages_in_block; page++) {
+
+                       if (test_bit(page, &msb->valid_cache_bitmap))
+                               continue;
+
+                       dbg("marking page %d as containing damaged data",
+                               page);
+                       msb_set_overwrite_flag(msb,
+                               pba , page, 0xFF & ~MEMSTICK_OV_PG_NORMAL);
+               }
+       }
+
+       msb_cache_discard(msb);
+       return error;
+}
+
+static int msb_cache_write(struct msb_data *msb, int lba,
+       int page, bool add_to_cache_only, struct scatterlist *sg, int offset)
+{
+       int error;
+       struct scatterlist sg_tmp[10];
+
+       if (msb->read_only)
+               return -EROFS;
+
+       if (msb->cache_block_lba == MS_BLOCK_INVALID ||
+                                               lba != msb->cache_block_lba)
+               if (add_to_cache_only)
+                       return 0;
+
+       /* If we need to write different block */
+       if (msb->cache_block_lba != MS_BLOCK_INVALID &&
+                                               lba != msb->cache_block_lba) {
+               dbg_verbose("first flush the cache");
+               error = msb_cache_flush(msb);
+               if (error)
+                       return error;
+       }
+
+       if (msb->cache_block_lba  == MS_BLOCK_INVALID) {
+               msb->cache_block_lba  = lba;
+               mod_timer(&msb->cache_flush_timer,
+                       jiffies + msecs_to_jiffies(cache_flush_timeout));
+       }
+
+       dbg_verbose("Write of LBA %d page %d to cache ", lba, page);
+
+       sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp));
+       msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp), offset, msb->page_size);
+
+       sg_copy_to_buffer(sg_tmp, sg_nents(sg_tmp),
+               msb->cache + page * msb->page_size, msb->page_size);
+
+       set_bit(page, &msb->valid_cache_bitmap);
+       return 0;
+}
+
+static int msb_cache_read(struct msb_data *msb, int lba,
+                               int page, struct scatterlist *sg, int offset)
+{
+       int pba = msb->lba_to_pba_table[lba];
+       struct scatterlist sg_tmp[10];
+       int error = 0;
+
+       if (lba == msb->cache_block_lba &&
+                       test_bit(page, &msb->valid_cache_bitmap)) {
+
+               dbg_verbose("Read of LBA %d (pba %d) sector %d from cache",
+                                                       lba, pba, page);
+
+               sg_init_table(sg_tmp, ARRAY_SIZE(sg_tmp));
+               msb_sg_copy(sg, sg_tmp, ARRAY_SIZE(sg_tmp),
+                       offset, msb->page_size);
+               sg_copy_from_buffer(sg_tmp, sg_nents(sg_tmp),
+                       msb->cache + msb->page_size * page,
+                                                       msb->page_size);
+       } else {
+               dbg_verbose("Read of LBA %d (pba %d) sector %d from device",
+                                                       lba, pba, page);
+
+               error = msb_read_page(msb, pba, page, NULL, sg, offset);
+               if (error)
+                       return error;
+
+               msb_cache_write(msb, lba, page, true, sg, offset);
+       }
+       return error;
+}
+
+/* Emulated geometry table
+ * This table content isn't that importaint,
+ * One could put here different values, providing that they still
+ * cover whole disk.
+ * 64 MB entry is what windows reports for my 64M memstick */
+
+static const struct chs_entry chs_table[] = {
+/*        size sectors cylynders  heads */
+       { 4,    16,    247,       2  },
+       { 8,    16,    495,       2  },
+       { 16,   16,    495,       4  },
+       { 32,   16,    991,       4  },
+       { 64,   16,    991,       8  },
+       {128,   16,    991,       16 },
+       { 0 }
+};
+
+/* Load information about the card */
+static int msb_init_card(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_host *host = card->host;
+       struct ms_boot_page *boot_block;
+       int error = 0, i, raw_size_in_megs;
+
+       msb->caps = 0;
+
+       if (card->id.class >= MEMSTICK_CLASS_ROM &&
+                               card->id.class <= MEMSTICK_CLASS_ROM)
+               msb->read_only = true;
+
+       msb->state = -1;
+       error = msb_reset(msb, false);
+       if (error)
+               return error;
+
+       /* Due to a bug in Jmicron driver written by Alex Dubov,
+        its serial mode barely works,
+        so we switch to parallel mode right away */
+       if (host->caps & MEMSTICK_CAP_PAR4)
+               msb_switch_to_parallel(msb);
+
+       msb->page_size = sizeof(struct ms_boot_page);
+
+       /* Read the boot page */
+       error = msb_read_boot_blocks(msb);
+       if (error)
+               return -EIO;
+
+       boot_block = &msb->boot_page[0];
+
+       /* Save intersting attributes from boot page */
+       msb->block_count = boot_block->attr.number_of_blocks;
+       msb->page_size = boot_block->attr.page_size;
+
+       msb->pages_in_block = boot_block->attr.block_size * 2;
+       msb->block_size = msb->page_size * msb->pages_in_block;
+
+       if (msb->page_size > PAGE_SIZE) {
+               /* this isn't supported by linux at all, anyway*/
+               dbg("device page %d size isn't supported", msb->page_size);
+               return -EINVAL;
+       }
+
+       msb->block_buffer = kzalloc(msb->block_size, GFP_KERNEL);
+       if (!msb->block_buffer)
+               return -ENOMEM;
+
+       raw_size_in_megs = (msb->block_size * msb->block_count) >> 20;
+
+       for (i = 0; chs_table[i].size; i++) {
+
+               if (chs_table[i].size != raw_size_in_megs)
+                       continue;
+
+               msb->geometry.cylinders = chs_table[i].cyl;
+               msb->geometry.heads = chs_table[i].head;
+               msb->geometry.sectors = chs_table[i].sec;
+               break;
+       }
+
+       if (boot_block->attr.transfer_supporting == 1)
+               msb->caps |= MEMSTICK_CAP_PAR4;
+
+       if (boot_block->attr.device_type & 0x03)
+               msb->read_only = true;
+
+       dbg("Total block count = %d", msb->block_count);
+       dbg("Each block consists of %d pages", msb->pages_in_block);
+       dbg("Page size = %d bytes", msb->page_size);
+       dbg("Parallel mode supported: %d", !!(msb->caps & MEMSTICK_CAP_PAR4));
+       dbg("Read only: %d", msb->read_only);
+
+#if 0
+       /* Now we can switch the interface */
+       if (host->caps & msb->caps & MEMSTICK_CAP_PAR4)
+               msb_switch_to_parallel(msb);
+#endif
+
+       error = msb_cache_init(msb);
+       if (error)
+               return error;
+
+       error = msb_ftl_initialize(msb);
+       if (error)
+               return error;
+
+
+       /* Read the bad block table */
+       error = msb_read_bad_block_table(msb, 0);
+
+       if (error && error != -ENOMEM) {
+               dbg("failed to read bad block table from primary boot block, trying from backup");
+               error = msb_read_bad_block_table(msb, 1);
+       }
+
+       if (error)
+               return error;
+
+       /* *drum roll* Scan the media */
+       error = msb_ftl_scan(msb);
+       if (error) {
+               pr_err("Scan of media failed");
+               return error;
+       }
+
+       return 0;
+
+}
+
+static int msb_do_write_request(struct msb_data *msb, int lba,
+       int page, struct scatterlist *sg, size_t len, int *sucessfuly_written)
+{
+       int error = 0;
+       off_t offset = 0;
+       *sucessfuly_written = 0;
+
+       while (offset < len) {
+               if (page == 0 && len - offset >= msb->block_size) {
+
+                       if (msb->cache_block_lba == lba)
+                               msb_cache_discard(msb);
+
+                       dbg_verbose("Writing whole lba %d", lba);
+                       error = msb_update_block(msb, lba, sg, offset);
+                       if (error)
+                               return error;
+
+                       offset += msb->block_size;
+                       *sucessfuly_written += msb->block_size;
+                       lba++;
+                       continue;
+               }
+
+               error = msb_cache_write(msb, lba, page, false, sg, offset);
+               if (error)
+                       return error;
+
+               offset += msb->page_size;
+               *sucessfuly_written += msb->page_size;
+
+               page++;
+               if (page == msb->pages_in_block) {
+                       page = 0;
+                       lba++;
+               }
+       }
+       return 0;
+}
+
+static int msb_do_read_request(struct msb_data *msb, int lba,
+               int page, struct scatterlist *sg, int len, int *sucessfuly_read)
+{
+       int error = 0;
+       int offset = 0;
+       *sucessfuly_read = 0;
+
+       while (offset < len) {
+
+               error = msb_cache_read(msb, lba, page, sg, offset);
+               if (error)
+                       return error;
+
+               offset += msb->page_size;
+               *sucessfuly_read += msb->page_size;
+
+               page++;
+               if (page == msb->pages_in_block) {
+                       page = 0;
+                       lba++;
+               }
+       }
+       return 0;
+}
+
+static void msb_io_work(struct work_struct *work)
+{
+       struct msb_data *msb = container_of(work, struct msb_data, io_work);
+       int page, error, len;
+       sector_t lba;
+       unsigned long flags;
+       struct scatterlist *sg = msb->prealloc_sg;
+
+       dbg_verbose("IO: work started");
+
+       while (1) {
+               spin_lock_irqsave(&msb->q_lock, flags);
+
+               if (msb->need_flush_cache) {
+                       msb->need_flush_cache = false;
+                       spin_unlock_irqrestore(&msb->q_lock, flags);
+                       msb_cache_flush(msb);
+                       continue;
+               }
+
+               if (!msb->req) {
+                       msb->req = blk_fetch_request(msb->queue);
+                       if (!msb->req) {
+                               dbg_verbose("IO: no more requests exiting");
+                               spin_unlock_irqrestore(&msb->q_lock, flags);
+                               return;
+                       }
+               }
+
+               spin_unlock_irqrestore(&msb->q_lock, flags);
+
+               /* If card was removed meanwhile */
+               if (!msb->req)
+                       return;
+
+               /* process the request */
+               dbg_verbose("IO: processing new request");
+               blk_rq_map_sg(msb->queue, msb->req, sg);
+
+               lba = blk_rq_pos(msb->req);
+
+               sector_div(lba, msb->page_size / 512);
+               page = do_div(lba, msb->pages_in_block);
+
+               if (rq_data_dir(msb->req) == READ)
+                       error = msb_do_read_request(msb, lba, page, sg,
+                               blk_rq_bytes(msb->req), &len);
+               else
+                       error = msb_do_write_request(msb, lba, page, sg,
+                               blk_rq_bytes(msb->req), &len);
+
+               spin_lock_irqsave(&msb->q_lock, flags);
+
+               if (len)
+                       if (!__blk_end_request(msb->req, 0, len))
+                               msb->req = NULL;
+
+               if (error && msb->req) {
+                       dbg_verbose("IO: ending one sector of the request with error");
+                       if (!__blk_end_request(msb->req, error, msb->page_size))
+                               msb->req = NULL;
+               }
+
+               if (msb->req)
+                       dbg_verbose("IO: request still pending");
+
+               spin_unlock_irqrestore(&msb->q_lock, flags);
+       }
+}
+
+static DEFINE_IDR(msb_disk_idr); /*set of used disk numbers */
+static DEFINE_MUTEX(msb_disk_lock); /* protects against races in open/release */
+
+static int msb_bd_open(struct block_device *bdev, fmode_t mode)
+{
+       struct gendisk *disk = bdev->bd_disk;
+       struct msb_data *msb = disk->private_data;
+
+       dbg_verbose("block device open");
+
+       mutex_lock(&msb_disk_lock);
+
+       if (msb && msb->card)
+               msb->usage_count++;
+
+       mutex_unlock(&msb_disk_lock);
+       return 0;
+}
+
+static void msb_data_clear(struct msb_data *msb)
+{
+       kfree(msb->boot_page);
+       kfree(msb->used_blocks_bitmap);
+       kfree(msb->lba_to_pba_table);
+       kfree(msb->cache);
+       msb->card = NULL;
+}
+
+static int msb_disk_release(struct gendisk *disk)
+{
+       struct msb_data *msb = disk->private_data;
+
+       dbg_verbose("block device release");
+       mutex_lock(&msb_disk_lock);
+
+       if (msb) {
+               if (msb->usage_count)
+                       msb->usage_count--;
+
+               if (!msb->usage_count) {
+                       disk->private_data = NULL;
+                       idr_remove(&msb_disk_idr, msb->disk_id);
+                       put_disk(disk);
+                       kfree(msb);
+               }
+       }
+       mutex_unlock(&msb_disk_lock);
+       return 0;
+}
+
+static void msb_bd_release(struct gendisk *disk, fmode_t mode)
+{
+       msb_disk_release(disk);
+}
+
+static int msb_bd_getgeo(struct block_device *bdev,
+                                struct hd_geometry *geo)
+{
+       struct msb_data *msb = bdev->bd_disk->private_data;
+       *geo = msb->geometry;
+       return 0;
+}
+
+static int msb_prepare_req(struct request_queue *q, struct request *req)
+{
+       if (req->cmd_type != REQ_TYPE_FS &&
+                               req->cmd_type != REQ_TYPE_BLOCK_PC) {
+               blk_dump_rq_flags(req, "MS unsupported request");
+               return BLKPREP_KILL;
+       }
+       req->cmd_flags |= REQ_DONTPREP;
+       return BLKPREP_OK;
+}
+
+static void msb_submit_req(struct request_queue *q)
+{
+       struct memstick_dev *card = q->queuedata;
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct request *req = NULL;
+
+       dbg_verbose("Submit request");
+
+       if (msb->card_dead) {
+               dbg("Refusing requests on removed card");
+
+               WARN_ON(!msb->io_queue_stopped);
+
+               while ((req = blk_fetch_request(q)) != NULL)
+                       __blk_end_request_all(req, -ENODEV);
+               return;
+       }
+
+       if (msb->req)
+               return;
+
+       if (!msb->io_queue_stopped)
+               queue_work(msb->io_queue, &msb->io_work);
+}
+
+static int msb_check_card(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       return (msb->card_dead == 0);
+}
+
+static void msb_stop(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       unsigned long flags;
+
+       dbg("Stopping all msblock IO");
+
+       spin_lock_irqsave(&msb->q_lock, flags);
+       blk_stop_queue(msb->queue);
+       msb->io_queue_stopped = true;
+       spin_unlock_irqrestore(&msb->q_lock, flags);
+
+       del_timer_sync(&msb->cache_flush_timer);
+       flush_workqueue(msb->io_queue);
+
+       if (msb->req) {
+               spin_lock_irqsave(&msb->q_lock, flags);
+               blk_requeue_request(msb->queue, msb->req);
+               msb->req = NULL;
+               spin_unlock_irqrestore(&msb->q_lock, flags);
+       }
+
+}
+
+static void msb_start(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       unsigned long flags;
+
+       dbg("Resuming IO from msblock");
+
+       msb_invalidate_reg_window(msb);
+
+       spin_lock_irqsave(&msb->q_lock, flags);
+       if (!msb->io_queue_stopped || msb->card_dead) {
+               spin_unlock_irqrestore(&msb->q_lock, flags);
+               return;
+       }
+       spin_unlock_irqrestore(&msb->q_lock, flags);
+
+       /* Kick cache flush anyway, its harmless */
+       msb->need_flush_cache = true;
+       msb->io_queue_stopped = false;
+
+       spin_lock_irqsave(&msb->q_lock, flags);
+       blk_start_queue(msb->queue);
+       spin_unlock_irqrestore(&msb->q_lock, flags);
+
+       queue_work(msb->io_queue, &msb->io_work);
+
+}
+
+static const struct block_device_operations msb_bdops = {
+       .open    = msb_bd_open,
+       .release = msb_bd_release,
+       .getgeo  = msb_bd_getgeo,
+       .owner   = THIS_MODULE
+};
+
+/* Registers the block device */
+static int msb_init_disk(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct memstick_host *host = card->host;
+       int rc;
+       u64 limit = BLK_BOUNCE_HIGH;
+       unsigned long capacity;
+
+       if (host->dev.dma_mask && *(host->dev.dma_mask))
+               limit = *(host->dev.dma_mask);
+
+       mutex_lock(&msb_disk_lock);
+       msb->disk_id = idr_alloc(&msb_disk_idr, card, 0, 256, GFP_KERNEL);
+       mutex_unlock(&msb_disk_lock);
+
+       if (msb->disk_id  < 0)
+               return msb->disk_id;
+
+       msb->disk = alloc_disk(0);
+       if (!msb->disk) {
+               rc = -ENOMEM;
+               goto out_release_id;
+       }
+
+       msb->queue = blk_init_queue(msb_submit_req, &msb->q_lock);
+       if (!msb->queue) {
+               rc = -ENOMEM;
+               goto out_put_disk;
+       }
+
+       msb->queue->queuedata = card;
+       blk_queue_prep_rq(msb->queue, msb_prepare_req);
+
+       blk_queue_bounce_limit(msb->queue, limit);
+       blk_queue_max_hw_sectors(msb->queue, MS_BLOCK_MAX_PAGES);
+       blk_queue_max_segments(msb->queue, MS_BLOCK_MAX_SEGS);
+       blk_queue_max_segment_size(msb->queue,
+                                  MS_BLOCK_MAX_PAGES * msb->page_size);
+       blk_queue_logical_block_size(msb->queue, msb->page_size);
+
+       sprintf(msb->disk->disk_name, "msblk%d", msb->disk_id);
+       msb->disk->fops = &msb_bdops;
+       msb->disk->private_data = msb;
+       msb->disk->queue = msb->queue;
+       msb->disk->driverfs_dev = &card->dev;
+       msb->disk->flags |= GENHD_FL_EXT_DEVT;
+
+       capacity = msb->pages_in_block * msb->logical_block_count;
+       capacity *= (msb->page_size / 512);
+       set_capacity(msb->disk, capacity);
+       dbg("Set total disk size to %lu sectors", capacity);
+
+       msb->usage_count = 1;
+       msb->io_queue = alloc_ordered_workqueue("ms_block", WQ_MEM_RECLAIM);
+       INIT_WORK(&msb->io_work, msb_io_work);
+       sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
+
+       if (msb->read_only)
+               set_disk_ro(msb->disk, 1);
+
+       msb_start(card);
+       add_disk(msb->disk);
+       dbg("Disk added");
+       return 0;
+
+out_put_disk:
+       put_disk(msb->disk);
+out_release_id:
+       mutex_lock(&msb_disk_lock);
+       idr_remove(&msb_disk_idr, msb->disk_id);
+       mutex_unlock(&msb_disk_lock);
+       return rc;
+}
+
+static int msb_probe(struct memstick_dev *card)
+{
+       struct msb_data *msb;
+       int rc = 0;
+
+       msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL);
+       if (!msb)
+               return -ENOMEM;
+       memstick_set_drvdata(card, msb);
+       msb->card = card;
+       spin_lock_init(&msb->q_lock);
+
+       rc = msb_init_card(card);
+       if (rc)
+               goto out_free;
+
+       rc = msb_init_disk(card);
+       if (!rc) {
+               card->check = msb_check_card;
+               card->stop = msb_stop;
+               card->start = msb_start;
+               return 0;
+       }
+out_free:
+       memstick_set_drvdata(card, NULL);
+       msb_data_clear(msb);
+       kfree(msb);
+       return rc;
+}
+
+static void msb_remove(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       unsigned long flags;
+
+       if (!msb->io_queue_stopped)
+               msb_stop(card);
+
+       dbg("Removing the disk device");
+
+       /* Take care of unhandled + new requests from now on */
+       spin_lock_irqsave(&msb->q_lock, flags);
+       msb->card_dead = true;
+       blk_start_queue(msb->queue);
+       spin_unlock_irqrestore(&msb->q_lock, flags);
+
+       /* Remove the disk */
+       del_gendisk(msb->disk);
+       blk_cleanup_queue(msb->queue);
+       msb->queue = NULL;
+
+       mutex_lock(&msb_disk_lock);
+       msb_data_clear(msb);
+       mutex_unlock(&msb_disk_lock);
+
+       msb_disk_release(msb->disk);
+       memstick_set_drvdata(card, NULL);
+}
+
+#ifdef CONFIG_PM
+
+static int msb_suspend(struct memstick_dev *card, pm_message_t state)
+{
+       msb_stop(card);
+       return 0;
+}
+
+static int msb_resume(struct memstick_dev *card)
+{
+       struct msb_data *msb = memstick_get_drvdata(card);
+       struct msb_data *new_msb = NULL;
+       bool card_dead = true;
+
+#ifndef CONFIG_MEMSTICK_UNSAFE_RESUME
+       msb->card_dead = true;
+       return 0;
+#endif
+       mutex_lock(&card->host->lock);
+
+       new_msb = kzalloc(sizeof(struct msb_data), GFP_KERNEL);
+       if (!new_msb)
+               goto out;
+
+       new_msb->card = card;
+       memstick_set_drvdata(card, new_msb);
+       spin_lock_init(&new_msb->q_lock);
+       sg_init_table(msb->prealloc_sg, MS_BLOCK_MAX_SEGS+1);
+
+       if (msb_init_card(card))
+               goto out;
+
+       if (msb->block_size != new_msb->block_size)
+               goto out;
+
+       if (memcmp(msb->boot_page, new_msb->boot_page,
+                                       sizeof(struct ms_boot_page)))
+               goto out;
+
+       if (msb->logical_block_count != new_msb->logical_block_count ||
+               memcmp(msb->lba_to_pba_table, new_msb->lba_to_pba_table,
+                                               msb->logical_block_count))
+               goto out;
+
+       if (msb->block_count != new_msb->block_count ||
+               memcmp(msb->used_blocks_bitmap, new_msb->used_blocks_bitmap,
+                                                       msb->block_count / 8))
+               goto out;
+
+       card_dead = false;
+out:
+       if (card_dead)
+               dbg("Card was removed/replaced during suspend");
+
+       msb->card_dead = card_dead;
+       memstick_set_drvdata(card, msb);
+
+       if (new_msb) {
+               msb_data_clear(new_msb);
+               kfree(new_msb);
+       }
+
+       msb_start(card);
+       mutex_unlock(&card->host->lock);
+       return 0;
+}
+#else
+
+#define msb_suspend NULL
+#define msb_resume NULL
+
+#endif /* CONFIG_PM */
+
+static struct memstick_device_id msb_id_tbl[] = {
+       {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+        MEMSTICK_CLASS_FLASH},
+
+       {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+        MEMSTICK_CLASS_ROM},
+
+       {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+        MEMSTICK_CLASS_RO},
+
+       {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_LEGACY, MEMSTICK_CATEGORY_STORAGE,
+        MEMSTICK_CLASS_WP},
+
+       {MEMSTICK_MATCH_ALL, MEMSTICK_TYPE_DUO, MEMSTICK_CATEGORY_STORAGE_DUO,
+        MEMSTICK_CLASS_DUO},
+       {}
+};
+MODULE_DEVICE_TABLE(memstick, msb_id_tbl);
+
+
+static struct memstick_driver msb_driver = {
+       .driver = {
+               .name  = DRIVER_NAME,
+               .owner = THIS_MODULE
+       },
+       .id_table = msb_id_tbl,
+       .probe    = msb_probe,
+       .remove   = msb_remove,
+       .suspend  = msb_suspend,
+       .resume   = msb_resume
+};
+
+static int major;
+
+static int __init msb_init(void)
+{
+       int rc = register_blkdev(0, DRIVER_NAME);
+
+       if (rc < 0) {
+               pr_err("failed to register major (error %d)\n", rc);
+               return rc;
+       }
+
+       major = rc;
+       rc = memstick_register_driver(&msb_driver);
+       if (rc) {
+               unregister_blkdev(major, DRIVER_NAME);
+               pr_err("failed to register memstick driver (error %d)\n", rc);
+       }
+
+       return rc;
+}
+
+static void __exit msb_exit(void)
+{
+       memstick_unregister_driver(&msb_driver);
+       unregister_blkdev(major, DRIVER_NAME);
+       idr_destroy(&msb_disk_idr);
+}
+
+module_init(msb_init);
+module_exit(msb_exit);
+
+module_param(cache_flush_timeout, int, S_IRUGO);
+MODULE_PARM_DESC(cache_flush_timeout,
+                               "Cache flush timeout in msec (1000 default)");
+module_param(debug, int, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug, "Debug level (0-2)");
+
+module_param(verify_writes, bool, S_IRUGO);
+MODULE_PARM_DESC(verify_writes, "Read back and check all data that is written");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Maxim Levitsky");
+MODULE_DESCRIPTION("Sony MemoryStick block device driver");
diff --git a/drivers/memstick/core/ms_block.h b/drivers/memstick/core/ms_block.h
new file mode 100644 (file)
index 0000000..96e6375
--- /dev/null
@@ -0,0 +1,290 @@
+/*
+ *  ms_block.h - Sony MemoryStick (legacy) storage support
+
+ *  Copyright (C) 2013 Maxim Levitsky <maximlevitsky@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Minor portions of the driver are copied from mspro_block.c which is
+ * Copyright (C) 2007 Alex Dubov <oakad@yahoo.com>
+ *
+ * Also ms structures were copied from old broken driver by same author
+ * These probably come from MS spec
+ *
+ */
+
+#ifndef MS_BLOCK_NEW_H
+#define MS_BLOCK_NEW_H
+
+#define MS_BLOCK_MAX_SEGS      32
+#define MS_BLOCK_MAX_PAGES     ((2 << 16) - 1)
+
+#define MS_BLOCK_MAX_BOOT_ADDR 0x000c
+#define MS_BLOCK_BOOT_ID       0x0001
+#define MS_BLOCK_INVALID       0xffff
+#define MS_MAX_ZONES           16
+#define MS_BLOCKS_IN_ZONE      512
+
+#define MS_BLOCK_MAP_LINE_SZ   16
+#define MS_BLOCK_PART_SHIFT    3
+
+
+#define MEMSTICK_UNCORR_ERROR (MEMSTICK_STATUS1_UCFG | \
+               MEMSTICK_STATUS1_UCEX | MEMSTICK_STATUS1_UCDT)
+
+#define MEMSTICK_CORR_ERROR (MEMSTICK_STATUS1_FGER | MEMSTICK_STATUS1_EXER | \
+       MEMSTICK_STATUS1_DTER)
+
+#define MEMSTICK_INT_ERROR (MEMSTICK_INT_CMDNAK | MEMSTICK_INT_ERR)
+
+#define MEMSTICK_OVERWRITE_FLAG_NORMAL \
+       (MEMSTICK_OVERWRITE_PGST1 | \
+       MEMSTICK_OVERWRITE_PGST0  | \
+       MEMSTICK_OVERWRITE_BKST)
+
+#define MEMSTICK_OV_PG_NORMAL \
+       (MEMSTICK_OVERWRITE_PGST1 | MEMSTICK_OVERWRITE_PGST0)
+
+#define MEMSTICK_MANAGMENT_FLAG_NORMAL \
+       (MEMSTICK_MANAGEMENT_SYSFLG |  \
+       MEMSTICK_MANAGEMENT_SCMS1   |  \
+       MEMSTICK_MANAGEMENT_SCMS0)     \
+
+struct ms_boot_header {
+       unsigned short block_id;
+       unsigned short format_reserved;
+       unsigned char  reserved0[184];
+       unsigned char  data_entry;
+       unsigned char  reserved1[179];
+} __packed;
+
+
+struct ms_system_item {
+       unsigned int  start_addr;
+       unsigned int  data_size;
+       unsigned char data_type_id;
+       unsigned char reserved[3];
+} __packed;
+
+struct ms_system_entry {
+       struct ms_system_item disabled_block;
+       struct ms_system_item cis_idi;
+       unsigned char         reserved[24];
+} __packed;
+
+struct ms_boot_attr_info {
+       unsigned char      memorystick_class;
+       unsigned char      format_unique_value1;
+       unsigned short     block_size;
+       unsigned short     number_of_blocks;
+       unsigned short     number_of_effective_blocks;
+       unsigned short     page_size;
+       unsigned char      extra_data_size;
+       unsigned char      format_unique_value2;
+       unsigned char      assembly_time[8];
+       unsigned char      format_unique_value3;
+       unsigned char      serial_number[3];
+       unsigned char      assembly_manufacturer_code;
+       unsigned char      assembly_model_code[3];
+       unsigned short     memory_manufacturer_code;
+       unsigned short     memory_device_code;
+       unsigned short     implemented_capacity;
+       unsigned char      format_unique_value4[2];
+       unsigned char      vcc;
+       unsigned char      vpp;
+       unsigned short     controller_number;
+       unsigned short     controller_function;
+       unsigned char      reserved0[9];
+       unsigned char      transfer_supporting;
+       unsigned short     format_unique_value5;
+       unsigned char      format_type;
+       unsigned char      memorystick_application;
+       unsigned char      device_type;
+       unsigned char      reserved1[22];
+       unsigned char      format_uniqure_value6[2];
+       unsigned char      reserved2[15];
+} __packed;
+
+struct ms_cis_idi {
+       unsigned short general_config;
+       unsigned short logical_cylinders;
+       unsigned short reserved0;
+       unsigned short logical_heads;
+       unsigned short track_size;
+       unsigned short page_size;
+       unsigned short pages_per_track;
+       unsigned short msw;
+       unsigned short lsw;
+       unsigned short reserved1;
+       unsigned char  serial_number[20];
+       unsigned short buffer_type;
+       unsigned short buffer_size_increments;
+       unsigned short long_command_ecc;
+       unsigned char  firmware_version[28];
+       unsigned char  model_name[18];
+       unsigned short reserved2[5];
+       unsigned short pio_mode_number;
+       unsigned short dma_mode_number;
+       unsigned short field_validity;
+       unsigned short current_logical_cylinders;
+       unsigned short current_logical_heads;
+       unsigned short current_pages_per_track;
+       unsigned int   current_page_capacity;
+       unsigned short mutiple_page_setting;
+       unsigned int   addressable_pages;
+       unsigned short single_word_dma;
+       unsigned short multi_word_dma;
+       unsigned char  reserved3[128];
+} __packed;
+
+
+struct ms_boot_page {
+       struct ms_boot_header    header;
+       struct ms_system_entry   entry;
+       struct ms_boot_attr_info attr;
+} __packed;
+
+struct msb_data {
+       unsigned int                    usage_count;
+       struct memstick_dev             *card;
+       struct gendisk                  *disk;
+       struct request_queue            *queue;
+       spinlock_t                      q_lock;
+       struct hd_geometry              geometry;
+       struct attribute_group          attr_group;
+       struct request                  *req;
+       int                             caps;
+       int                             disk_id;
+
+       /* IO */
+       struct workqueue_struct         *io_queue;
+       bool                            io_queue_stopped;
+       struct work_struct              io_work;
+       bool                            card_dead;
+
+       /* Media properties */
+       struct ms_boot_page             *boot_page;
+       u16                             boot_block_locations[2];
+       int                             boot_block_count;
+
+       bool                            read_only;
+       unsigned short                  page_size;
+       int                             block_size;
+       int                             pages_in_block;
+       int                             zone_count;
+       int                             block_count;
+       int                             logical_block_count;
+
+       /* FTL tables */
+       unsigned long                   *used_blocks_bitmap;
+       unsigned long                   *erased_blocks_bitmap;
+       u16                             *lba_to_pba_table;
+       int                             free_block_count[MS_MAX_ZONES];
+       bool                            ftl_initialized;
+
+       /* Cache */
+       unsigned char                   *cache;
+       unsigned long                   valid_cache_bitmap;
+       int                             cache_block_lba;
+       bool                            need_flush_cache;
+       struct timer_list               cache_flush_timer;
+
+       /* Preallocated buffers */
+       unsigned char                   *block_buffer;
+       struct scatterlist              prealloc_sg[MS_BLOCK_MAX_SEGS+1];
+
+
+       /* handler's local data */
+       struct ms_register_addr         reg_addr;
+       bool                            addr_valid;
+
+       u8                              command_value;
+       bool                            command_need_oob;
+       struct scatterlist              *current_sg;
+       int                             current_sg_offset;
+
+       struct ms_register              regs;
+       int                             current_page;
+
+       int                             state;
+       int                             exit_error;
+       bool                            int_polling;
+       unsigned long                   int_timeout;
+
+};
+
+enum msb_readpage_states {
+       MSB_RP_SEND_BLOCK_ADDRESS = 0,
+       MSB_RP_SEND_READ_COMMAND,
+
+       MSB_RP_SEND_INT_REQ,
+       MSB_RP_RECEIVE_INT_REQ_RESULT,
+
+       MSB_RP_SEND_READ_STATUS_REG,
+       MSB_RP_RECIVE_STATUS_REG,
+
+       MSB_RP_SEND_OOB_READ,
+       MSB_RP_RECEIVE_OOB_READ,
+
+       MSB_RP_SEND_READ_DATA,
+       MSB_RP_RECEIVE_READ_DATA,
+};
+
+enum msb_write_block_states {
+       MSB_WB_SEND_WRITE_PARAMS = 0,
+       MSB_WB_SEND_WRITE_OOB,
+       MSB_WB_SEND_WRITE_COMMAND,
+
+       MSB_WB_SEND_INT_REQ,
+       MSB_WB_RECEIVE_INT_REQ,
+
+       MSB_WB_SEND_WRITE_DATA,
+       MSB_WB_RECEIVE_WRITE_CONFIRMATION,
+};
+
+enum msb_send_command_states {
+       MSB_SC_SEND_WRITE_PARAMS,
+       MSB_SC_SEND_WRITE_OOB,
+       MSB_SC_SEND_COMMAND,
+
+       MSB_SC_SEND_INT_REQ,
+       MSB_SC_RECEIVE_INT_REQ,
+
+};
+
+enum msb_reset_states {
+       MSB_RS_SEND,
+       MSB_RS_CONFIRM,
+};
+
+enum msb_par_switch_states {
+       MSB_PS_SEND_SWITCH_COMMAND,
+       MSB_PS_SWICH_HOST,
+       MSB_PS_CONFIRM,
+};
+
+struct chs_entry {
+       unsigned long size;
+       unsigned char sec;
+       unsigned short cyl;
+       unsigned char head;
+};
+
+static int msb_reset(struct msb_data *msb, bool full);
+
+static int h_msb_default_bad(struct memstick_dev *card,
+                                               struct memstick_request **mrq);
+
+#define __dbg(level, format, ...) \
+       do { \
+               if (debug >= level) \
+                       pr_err(format "\n", ## __VA_ARGS__); \
+       } while (0)
+
+
+#define dbg(format, ...)               __dbg(1, format, ## __VA_ARGS__)
+#define dbg_verbose(format, ...)       __dbg(2, format, ## __VA_ARGS__)
+
+#endif
index cf8bd72..25f8f93 100644 (file)
@@ -612,8 +612,6 @@ static int rtsx_pci_ms_drv_remove(struct platform_device *pdev)
        memstick_remove_host(msh);
        memstick_free_host(msh);
 
-       platform_set_drvdata(pdev, NULL);
-
        dev_dbg(&(pdev->dev),
                ": Realtek PCI-E Memstick controller has been removed\n");
 
index e0e46f5..914c3d1 100644 (file)
@@ -23,7 +23,7 @@ config MFD_AS3711
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          Support for the AS3711 PMIC from AMS
 
@@ -40,7 +40,7 @@ config PMIC_ADP5520
 config MFD_AAT2870_CORE
        bool "AnalogicTech AAT2870"
        select MFD_CORE
-       depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+       depends on I2C=y && GPIOLIB
        help
          If you say yes here you get support for the AAT2870.
          This driver provides common support for accessing the device,
@@ -78,7 +78,7 @@ config MFD_CROS_EC_SPI
 
 config MFD_ASIC3
        bool "Compaq ASIC3"
-       depends on GENERIC_HARDIRQS && GPIOLIB && ARM
+       depends on GPIOLIB && ARM
        select MFD_CORE
         ---help---
          This driver supports the ASIC3 multifunction chip found on many
@@ -104,7 +104,7 @@ config MFD_DA9052_SPI
        select REGMAP_SPI
        select REGMAP_IRQ
        select PMIC_DA9052
-       depends on SPI_MASTER=y && GENERIC_HARDIRQS
+       depends on SPI_MASTER=y
        help
          Support for the Dialog Semiconductor DA9052 PMIC
          when controlled using SPI. This driver provides common support
@@ -116,7 +116,7 @@ config MFD_DA9052_I2C
        select REGMAP_I2C
        select REGMAP_IRQ
        select PMIC_DA9052
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          Support for the Dialog Semiconductor DA9052 PMIC
          when controlled using I2C. This driver provides common support
@@ -128,7 +128,7 @@ config MFD_DA9055
        select REGMAP_I2C
        select REGMAP_IRQ
        select MFD_CORE
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          Say yes here for support of Dialog Semiconductor DA9055. This is
          a Power Management IC. This driver provides common support for
@@ -144,7 +144,7 @@ config MFD_DA9063
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          Say yes here for support for the Dialog Semiconductor DA9063 PMIC.
          This includes the I2C driver and core APIs.
@@ -156,7 +156,7 @@ config MFD_MC13783
 
 config MFD_MC13XXX
        tristate
-       depends on (SPI_MASTER || I2C) && GENERIC_HARDIRQS
+       depends on (SPI_MASTER || I2C)
        select MFD_CORE
        select MFD_MC13783
        help
@@ -167,7 +167,7 @@ config MFD_MC13XXX
 
 config MFD_MC13XXX_SPI
        tristate "Freescale MC13783 and MC13892 SPI interface"
-       depends on SPI_MASTER && GENERIC_HARDIRQS
+       depends on SPI_MASTER
        select REGMAP_SPI
        select MFD_MC13XXX
        help
@@ -175,7 +175,7 @@ config MFD_MC13XXX_SPI
 
 config MFD_MC13XXX_I2C
        tristate "Freescale MC13892 I2C interface"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        select REGMAP_I2C
        select MFD_MC13XXX
        help
@@ -183,7 +183,7 @@ config MFD_MC13XXX_I2C
 
 config HTC_EGPIO
        bool "HTC EGPIO support"
-       depends on GENERIC_HARDIRQS && GPIOLIB && ARM
+       depends on GPIOLIB && ARM
        help
            This driver supports the CPLD egpio chip present on
            several HTC phones.  It provides basic support for input
@@ -192,7 +192,6 @@ config HTC_EGPIO
 config HTC_PASIC3
        tristate "HTC PASIC3 LED/DS1WM chip support"
        select MFD_CORE
-       depends on GENERIC_HARDIRQS
        help
          This core driver provides register access for the LED/DS1WM
          chips labeled "AIC2" and "AIC3", found on HTC Blueangel and
@@ -210,7 +209,7 @@ config HTC_I2CPLD
 
 config LPC_ICH
        tristate "Intel ICH LPC"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        help
          The LPC bridge function of the Intel ICH provides support for
@@ -220,7 +219,7 @@ config LPC_ICH
 
 config LPC_SCH
        tristate "Intel SCH LPC"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        help
          LPC bridge function of the Intel SCH provides support for
@@ -238,7 +237,7 @@ config MFD_INTEL_MSIC
 config MFD_JANZ_CMODIO
        tristate "Janz CMOD-IO PCI MODULbus Carrier Board"
        select MFD_CORE
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        help
          This is the core driver for the Janz CMOD-IO PCI MODULbus
          carrier board. This device is a PCI to MODULbus bridge which may
@@ -277,7 +276,7 @@ config MFD_KEMPLD
 
 config MFD_88PM800
        tristate "Marvell 88PM800"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select REGMAP_I2C
        select REGMAP_IRQ
        select MFD_CORE
@@ -289,7 +288,7 @@ config MFD_88PM800
 
 config MFD_88PM805
        tristate "Marvell 88PM805"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select REGMAP_I2C
        select REGMAP_IRQ
        select MFD_CORE
@@ -301,7 +300,7 @@ config MFD_88PM805
 
 config MFD_88PM860X
        bool "Marvell 88PM8606/88PM8607"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select REGMAP_I2C
        select MFD_CORE
        help
@@ -312,7 +311,7 @@ config MFD_88PM860X
 
 config MFD_MAX77686
        bool "Maxim Semiconductor MAX77686 PMIC Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select IRQ_DOMAIN
@@ -325,7 +324,7 @@ config MFD_MAX77686
 
 config MFD_MAX77693
        bool "Maxim Semiconductor MAX77693 PMIC Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        help
@@ -339,7 +338,7 @@ config MFD_MAX77693
 config MFD_MAX8907
        tristate "Maxim Semiconductor MAX8907 PMIC Support"
        select MFD_CORE
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select REGMAP_I2C
        select REGMAP_IRQ
        help
@@ -350,7 +349,7 @@ config MFD_MAX8907
 
 config MFD_MAX8925
        bool "Maxim Semiconductor MAX8925 PMIC Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        help
          Say yes here to support for Maxim Semiconductor MAX8925. This is
@@ -360,7 +359,7 @@ config MFD_MAX8925
 
 config MFD_MAX8997
        bool "Maxim Semiconductor MAX8997/8966 PMIC Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select IRQ_DOMAIN
        help
@@ -373,7 +372,7 @@ config MFD_MAX8997
 
 config MFD_MAX8998
        bool "Maxim Semiconductor MAX8998/National LP3974 PMIC Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select IRQ_DOMAIN
        help
@@ -385,7 +384,7 @@ config MFD_MAX8998
 
 config EZX_PCAP
        bool "Motorola EZXPCAP Support"
-       depends on GENERIC_HARDIRQS && SPI_MASTER
+       depends on SPI_MASTER
        help
          This enables the PCAP ASIC present on EZX Phones. This is
          needed for MMC, TouchScreen, Sound, USB, etc..
@@ -393,7 +392,7 @@ config EZX_PCAP
 config MFD_VIPERBOARD
         tristate "Nano River Technologies Viperboard"
        select MFD_CORE
-       depends on USB && GENERIC_HARDIRQS
+       depends on USB
        default n
        help
          Say yes here if you want support for Nano River Technologies
@@ -407,7 +406,7 @@ config MFD_VIPERBOARD
 config MFD_RETU
        tristate "Nokia Retu and Tahvo multi-function device"
        select MFD_CORE
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        select REGMAP_IRQ
        help
          Retu and Tahvo are a multi-function devices found on Nokia
@@ -480,7 +479,7 @@ config MFD_PM8XXX_IRQ
 config MFD_RDC321X
        tristate "RDC R-321x southbridge"
        select MFD_CORE
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        help
          Say yes here if you want to have support for the RDC R-321x SoC
          southbridge which provides access to GPIOs and Watchdog using the
@@ -488,7 +487,7 @@ config MFD_RDC321X
 
 config MFD_RTSX_PCI
        tristate "Realtek PCI-E card reader"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        help
          This supports for Realtek PCI-Express card reader including rts5209,
@@ -498,7 +497,7 @@ config MFD_RTSX_PCI
 
 config MFD_RC5T583
        bool "Ricoh RC5T583 Power Management system device"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        help
@@ -512,7 +511,7 @@ config MFD_RC5T583
 
 config MFD_SEC_CORE
        bool "SAMSUNG Electronics PMIC Series Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
@@ -555,7 +554,7 @@ config MFD_SM501_GPIO
 
 config MFD_SMSC
        bool "SMSC ECE1099 series chips"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        help
@@ -577,7 +576,7 @@ config ABX500_CORE
 
 config AB3100_CORE
        bool "ST-Ericsson AB3100 Mixed Signal Circuit core functions"
-       depends on I2C=y && ABX500_CORE && GENERIC_HARDIRQS
+       depends on I2C=y && ABX500_CORE
        select MFD_CORE
        default y if ARCH_U300
        help
@@ -601,7 +600,7 @@ config AB3100_OTP
 
 config AB8500_CORE
        bool "ST-Ericsson AB8500 Mixed Signal Power Management chip"
-       depends on GENERIC_HARDIRQS && ABX500_CORE && MFD_DB8500_PRCMU
+       depends on ABX500_CORE && MFD_DB8500_PRCMU
        select POWER_SUPPLY
        select MFD_CORE
        select IRQ_DOMAIN
@@ -639,7 +638,7 @@ config MFD_DB8500_PRCMU
 
 config MFD_STMPE
        bool "STMicroelectronics STMPE"
-       depends on (I2C=y || SPI_MASTER=y) && GENERIC_HARDIRQS
+       depends on (I2C=y || SPI_MASTER=y)
        select MFD_CORE
        help
          Support for the STMPE family of I/O Expanders from
@@ -680,7 +679,7 @@ endmenu
 
 config MFD_STA2X11
        bool "STMicroelectronics STA2X11"
-       depends on STA2X11 && GENERIC_HARDIRQS
+       depends on STA2X11
        select MFD_CORE
        select REGMAP_MMIO
 
@@ -700,7 +699,6 @@ config MFD_TI_AM335X_TSCADC
        select MFD_CORE
        select REGMAP
        select REGMAP_MMIO
-       depends on GENERIC_HARDIRQS
        help
          If you say yes here you get support for Texas Instruments series
          of Touch Screen /ADC chips.
@@ -717,7 +715,7 @@ config MFD_DM355EVM_MSP
 
 config MFD_LP8788
        bool "TI LP8788 Power Management Unit Driver"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select IRQ_DOMAIN
@@ -739,14 +737,14 @@ config MFD_PALMAS
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          If you say yes here you get support for the Palmas
          series of PMIC chips from Texas Instruments.
 
 config MFD_TI_SSP
        tristate "TI Sequencer Serial Port support"
-       depends on ARCH_DAVINCI_TNETV107X && GENERIC_HARDIRQS
+       depends on ARCH_DAVINCI_TNETV107X
        select MFD_CORE
        ---help---
          Say Y here if you want support for the Sequencer Serial Port
@@ -761,7 +759,6 @@ config TPS6105X
        select REGULATOR
        select MFD_CORE
        select REGULATOR_FIXED_VOLTAGE
-       depends on GENERIC_HARDIRQS
        help
          This option enables a driver for the TP61050/TPS61052
          high-power "white LED driver". This boost converter is
@@ -784,7 +781,7 @@ config TPS65010
 config TPS6507X
        tristate "TI TPS6507x Power Management / Touch Screen chips"
        select MFD_CORE
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        help
          If you say yes here you get support for the TPS6507x series of
          Power Management / Touch Screen chips.  These include voltage
@@ -798,7 +795,7 @@ config TPS65911_COMPARATOR
 
 config MFD_TPS65090
        bool "TI TPS65090 Power Management chips"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
@@ -811,7 +808,7 @@ config MFD_TPS65090
 
 config MFD_TPS65217
        tristate "TI TPS65217 Power Management / White LED chips"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        select MFD_CORE
        select REGMAP_I2C
        help
@@ -826,7 +823,7 @@ config MFD_TPS65217
 
 config MFD_TPS6586X
        bool "TI TPS6586x Power Management chips"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        help
@@ -841,7 +838,7 @@ config MFD_TPS6586X
 
 config MFD_TPS65910
        bool "TI TPS65910 Power Management chip"
-       depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+       depends on I2C=y && GPIOLIB
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
@@ -862,7 +859,7 @@ config MFD_TPS65912_I2C
        bool "TI TPS65912 Power Management chip with I2C"
        select MFD_CORE
        select MFD_TPS65912
-       depends on I2C=y && GPIOLIB && GENERIC_HARDIRQS
+       depends on I2C=y && GPIOLIB
        help
          If you say yes here you get support for the TPS65912 series of
          PM chips with I2C interface.
@@ -871,14 +868,14 @@ config MFD_TPS65912_SPI
        bool "TI TPS65912 Power Management chip with SPI"
        select MFD_CORE
        select MFD_TPS65912
-       depends on SPI_MASTER && GPIOLIB && GENERIC_HARDIRQS
+       depends on SPI_MASTER && GPIOLIB
        help
          If you say yes here you get support for the TPS65912 series of
          PM chips with SPI interface.
 
 config MFD_TPS80031
        bool "TI TPS80031/TPS80032 Power Management chips"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
@@ -892,7 +889,7 @@ config MFD_TPS80031
 
 config TWL4030_CORE
        bool "TI TWL4030/TWL5030/TWL6030/TPS659x0 Support"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select IRQ_DOMAIN
        select REGMAP_I2C
        help
@@ -931,13 +928,13 @@ config TWL4030_POWER
 
 config MFD_TWL4030_AUDIO
        bool "TI TWL4030 Audio"
-       depends on TWL4030_CORE && GENERIC_HARDIRQS
+       depends on TWL4030_CORE
        select MFD_CORE
        default n
 
 config TWL6040_CORE
        bool "TI TWL6040 audio codec"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
@@ -961,7 +958,7 @@ config MENELAUS
 
 config MFD_WL1273_CORE
        tristate "TI WL1273 FM radio"
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        select MFD_CORE
        default n
        help
@@ -974,7 +971,6 @@ config MFD_LM3533
        depends on I2C
        select MFD_CORE
        select REGMAP_I2C
-       depends on GENERIC_HARDIRQS
        help
          Say yes here to enable support for National Semiconductor / TI
          LM3533 Lighting Power chips.
@@ -996,7 +992,7 @@ config MFD_TIMBERDALE
 
 config MFD_TC3589X
        bool "Toshiba TC35892 and variants"
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_CORE
        help
          Support for the Toshiba TC35892 and variants I/O Expander.
@@ -1011,7 +1007,7 @@ config MFD_TMIO
 
 config MFD_T7L66XB
        bool "Toshiba T7L66XB"
-       depends on ARM && HAVE_CLK && GENERIC_HARDIRQS
+       depends on ARM && HAVE_CLK
        select MFD_CORE
        select MFD_TMIO
        help
@@ -1036,7 +1032,7 @@ config MFD_TC6393XB
 
 config MFD_VX855
        tristate "VIA VX855/VX875 integrated south bridge"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select MFD_CORE
        help
          Say yes here to enable support for various functions of the
@@ -1054,7 +1050,7 @@ config MFD_ARIZONA_I2C
        select MFD_ARIZONA
        select MFD_CORE
        select REGMAP_I2C
-       depends on I2C && GENERIC_HARDIRQS
+       depends on I2C
        help
          Support for the Wolfson Microelectronics Arizona platform audio SoC
          core functionality controlled via I2C.
@@ -1064,7 +1060,7 @@ config MFD_ARIZONA_SPI
        select MFD_ARIZONA
        select MFD_CORE
        select REGMAP_SPI
-       depends on SPI_MASTER && GENERIC_HARDIRQS
+       depends on SPI_MASTER
        help
          Support for the Wolfson Microelectronics Arizona platform audio SoC
          core functionality controlled via I2C.
@@ -1090,7 +1086,7 @@ config MFD_WM8997
 config MFD_WM8400
        bool "Wolfson Microelectronics WM8400"
        select MFD_CORE
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select REGMAP_I2C
        help
          Support for the Wolfson Microelecronics WM8400 PMIC and audio
@@ -1100,7 +1096,6 @@ config MFD_WM8400
 
 config MFD_WM831X
        bool
-       depends on GENERIC_HARDIRQS
 
 config MFD_WM831X_I2C
        bool "Wolfson Microelectronics WM831x/2x PMICs with I2C"
@@ -1108,7 +1103,7 @@ config MFD_WM831X_I2C
        select MFD_WM831X
        select REGMAP_I2C
        select IRQ_DOMAIN
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          Support for the Wolfson Microelecronics WM831x and WM832x PMICs
          when controlled using I2C.  This driver provides common support
@@ -1121,7 +1116,7 @@ config MFD_WM831X_SPI
        select MFD_WM831X
        select REGMAP_SPI
        select IRQ_DOMAIN
-       depends on SPI_MASTER && GENERIC_HARDIRQS
+       depends on SPI_MASTER
        help
          Support for the Wolfson Microelecronics WM831x and WM832x PMICs
          when controlled using SPI.  This driver provides common support
@@ -1130,12 +1125,11 @@ config MFD_WM831X_SPI
 
 config MFD_WM8350
        bool
-       depends on GENERIC_HARDIRQS
 
 config MFD_WM8350_I2C
        bool "Wolfson Microelectronics WM8350 with I2C"
        select MFD_WM8350
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          The WM8350 is an integrated audio and power management
          subsystem with watchdog and RTC functionality for embedded
@@ -1148,7 +1142,7 @@ config MFD_WM8994
        select MFD_CORE
        select REGMAP_I2C
        select REGMAP_IRQ
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        help
          The WM8994 is a highly integrated hi-fi CODEC designed for
          smartphone applicatiosn.  As well as audio functionality it
index 5acb9c5..22429b8 100644 (file)
@@ -1,6 +1,6 @@
 config CB710_CORE
        tristate "ENE CB710/720 Flash memory card reader support"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        help
          This option enables support for PCI ENE CB710/720 Flash memory card
          reader found in some laptops (ie. some versions of HP Compaq nx9500).
index b7fd5ab..7fc5099 100644 (file)
@@ -487,7 +487,7 @@ config MMC_SDHI
 
 config MMC_CB710
        tristate "ENE CB710 MMC/SD Interface support"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        select CB710_CORE
        help
          This option enables support for MMC/SD part of ENE CB710/720 Flash
index 1542751..f5aa4b0 100644 (file)
@@ -1343,7 +1343,7 @@ out:
 static int invalidate_fastmap(struct ubi_device *ubi,
                              struct ubi_fastmap_layout *fm)
 {
-       int ret, i;
+       int ret;
        struct ubi_vid_hdr *vh;
 
        ret = erase_block(ubi, fm->e[0]->pnum);
@@ -1360,9 +1360,6 @@ static int invalidate_fastmap(struct ubi_device *ubi,
        vh->sqnum = cpu_to_be64(ubi_next_sqnum(ubi));
        ret = ubi_io_write_vid_hdr(ubi, fm->e[0]->pnum, vh);
 
-       for (i = 0; i < fm->used_blocks; i++)
-               ubi_wl_put_fm_peb(ubi, fm->e[i], i, fm->to_be_tortured[i]);
-
        return ret;
 }
 
index 5df49d3..c95bfb1 100644 (file)
@@ -1069,6 +1069,9 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
                if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD)) {
                        dbg_wl("no WL needed: min used EC %d, max free EC %d",
                               e1->ec, e2->ec);
+
+                       /* Give the unused PEB back */
+                       wl_tree_add(e2, &ubi->free);
                        goto out_cancel;
                }
                self_check_in_wl_tree(ubi, e1, &ubi->used);
index 39e5b1c..72df399 100644 (file)
@@ -2404,8 +2404,8 @@ static void bond_validate_arp(struct bonding *bond, struct slave *slave, __be32
        slave->target_last_arp_rx[i] = jiffies;
 }
 
-static int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
-                       struct slave *slave)
+int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond,
+                struct slave *slave)
 {
        struct arphdr *arp = (struct arphdr *)skb->data;
        unsigned char *arp_ptr;
index ce46776..eeab40b 100644 (file)
@@ -349,6 +349,8 @@ static ssize_t bonding_store_mode(struct device *d,
                goto out;
        }
 
+       /* don't cache arp_validate between modes */
+       bond->params.arp_validate = BOND_ARP_VALIDATE_NONE;
        bond->params.mode = new_value;
        bond_set_mode_ops(bond, bond->params.mode);
        pr_info("%s: setting mode to %s (%d).\n",
@@ -419,27 +421,39 @@ static ssize_t bonding_store_arp_validate(struct device *d,
                                          struct device_attribute *attr,
                                          const char *buf, size_t count)
 {
-       int new_value;
        struct bonding *bond = to_bond(d);
+       int new_value, ret = count;
 
+       if (!rtnl_trylock())
+               return restart_syscall();
        new_value = bond_parse_parm(buf, arp_validate_tbl);
        if (new_value < 0) {
                pr_err("%s: Ignoring invalid arp_validate value %s\n",
                       bond->dev->name, buf);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
-       if (new_value && (bond->params.mode != BOND_MODE_ACTIVEBACKUP)) {
+       if (bond->params.mode != BOND_MODE_ACTIVEBACKUP) {
                pr_err("%s: arp_validate only supported in active-backup mode.\n",
                       bond->dev->name);
-               return -EINVAL;
+               ret = -EINVAL;
+               goto out;
        }
        pr_info("%s: setting arp_validate to %s (%d).\n",
                bond->dev->name, arp_validate_tbl[new_value].modename,
                new_value);
 
+       if (bond->dev->flags & IFF_UP) {
+               if (!new_value)
+                       bond->recv_probe = NULL;
+               else if (bond->params.arp_interval)
+                       bond->recv_probe = bond_arp_rcv;
+       }
        bond->params.arp_validate = new_value;
+out:
+       rtnl_unlock();
 
-       return count;
+       return ret;
 }
 
 static DEVICE_ATTR(arp_validate, S_IRUGO | S_IWUSR, bonding_show_arp_validate,
@@ -555,8 +569,8 @@ static ssize_t bonding_store_arp_interval(struct device *d,
                                          struct device_attribute *attr,
                                          const char *buf, size_t count)
 {
-       int new_value, ret = count;
        struct bonding *bond = to_bond(d);
+       int new_value, ret = count;
 
        if (!rtnl_trylock())
                return restart_syscall();
@@ -599,8 +613,13 @@ static ssize_t bonding_store_arp_interval(struct device *d,
                 * is called.
                 */
                if (!new_value) {
+                       if (bond->params.arp_validate)
+                               bond->recv_probe = NULL;
                        cancel_delayed_work_sync(&bond->arp_work);
                } else {
+                       /* arp_validate can be set only in active-backup mode */
+                       if (bond->params.arp_validate)
+                               bond->recv_probe = bond_arp_rcv;
                        cancel_delayed_work_sync(&bond->mii_work);
                        queue_delayed_work(bond->wq, &bond->arp_work, 0);
                }
index f7ab161..7ad8bd5 100644 (file)
@@ -430,6 +430,7 @@ static inline bool slave_can_tx(struct slave *slave)
 
 struct bond_net;
 
+int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
 struct vlan_entry *bond_next_vlan(struct bonding *bond, struct vlan_entry *curr);
 int bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id);
index 3d86ffe..94edc9c 100644 (file)
@@ -725,6 +725,7 @@ static irqreturn_t lance_dma_merr_int(int irq, void *dev_id)
 {
        struct net_device *dev = dev_id;
 
+       clear_ioasic_dma_irq(irq);
        printk(KERN_ERR "%s: DMA error\n", dev->name);
        return IRQ_HANDLED;
 }
index 8ac48fb..b9a5fb6 100644 (file)
@@ -926,13 +926,13 @@ static int bcm_enet_open(struct net_device *dev)
        if (ret)
                goto out_phy_disconnect;
 
-       ret = request_irq(priv->irq_rx, bcm_enet_isr_dma, IRQF_DISABLED,
+       ret = request_irq(priv->irq_rx, bcm_enet_isr_dma, 0,
                          dev->name, dev);
        if (ret)
                goto out_freeirq;
 
        ret = request_irq(priv->irq_tx, bcm_enet_isr_dma,
-                         IRQF_DISABLED, dev->name, dev);
+                         0, dev->name, dev);
        if (ret)
                goto out_freeirq_rx;
 
@@ -2156,13 +2156,13 @@ static int bcm_enetsw_open(struct net_device *dev)
        enet_dmac_writel(priv, 0, ENETDMAC_IRMASK, priv->tx_chan);
 
        ret = request_irq(priv->irq_rx, bcm_enet_isr_dma,
-                         IRQF_DISABLED, dev->name, dev);
+                         0, dev->name, dev);
        if (ret)
                goto out_freeirq;
 
        if (priv->irq_tx != -1) {
                ret = request_irq(priv->irq_tx, bcm_enet_isr_dma,
-                                 IRQF_DISABLED, dev->name, dev);
+                                 0, dev->name, dev);
                if (ret)
                        goto out_freeirq_rx;
        }
index 2361bf2..90045c9 100644 (file)
@@ -490,10 +490,10 @@ static void bnx2x_set_gro_params(struct sk_buff *skb, u16 parsing_flags,
        NAPI_GRO_CB(skb)->count = num_of_coalesced_segs;
 }
 
-static int bnx2x_alloc_rx_sge(struct bnx2x *bp,
-                             struct bnx2x_fastpath *fp, u16 index)
+static int bnx2x_alloc_rx_sge(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+                             u16 index, gfp_t gfp_mask)
 {
-       struct page *page = alloc_pages(GFP_ATOMIC, PAGES_PER_SGE_SHIFT);
+       struct page *page = alloc_pages(gfp_mask, PAGES_PER_SGE_SHIFT);
        struct sw_rx_page *sw_buf = &fp->rx_page_ring[index];
        struct eth_rx_sge *sge = &fp->rx_sge_ring[index];
        dma_addr_t mapping;
@@ -572,7 +572,7 @@ static int bnx2x_fill_frag_skb(struct bnx2x *bp, struct bnx2x_fastpath *fp,
 
                /* If we fail to allocate a substitute page, we simply stop
                   where we are and drop the whole packet */
-               err = bnx2x_alloc_rx_sge(bp, fp, sge_idx);
+               err = bnx2x_alloc_rx_sge(bp, fp, sge_idx, GFP_ATOMIC);
                if (unlikely(err)) {
                        bnx2x_fp_qstats(bp, fp)->rx_skb_alloc_failed++;
                        return err;
@@ -616,12 +616,17 @@ static void bnx2x_frag_free(const struct bnx2x_fastpath *fp, void *data)
                kfree(data);
 }
 
-static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp)
+static void *bnx2x_frag_alloc(const struct bnx2x_fastpath *fp, gfp_t gfp_mask)
 {
-       if (fp->rx_frag_size)
+       if (fp->rx_frag_size) {
+               /* GFP_KERNEL allocations are used only during initialization */
+               if (unlikely(gfp_mask & __GFP_WAIT))
+                       return (void *)__get_free_page(gfp_mask);
+
                return netdev_alloc_frag(fp->rx_frag_size);
+       }
 
-       return kmalloc(fp->rx_buf_size + NET_SKB_PAD, GFP_ATOMIC);
+       return kmalloc(fp->rx_buf_size + NET_SKB_PAD, gfp_mask);
 }
 
 #ifdef CONFIG_INET
@@ -701,7 +706,7 @@ static void bnx2x_tpa_stop(struct bnx2x *bp, struct bnx2x_fastpath *fp,
                goto drop;
 
        /* Try to allocate the new data */
-       new_data = bnx2x_frag_alloc(fp);
+       new_data = bnx2x_frag_alloc(fp, GFP_ATOMIC);
        /* Unmap skb in the pool anyway, as we are going to change
           pool entry status to BNX2X_TPA_STOP even if new skb allocation
           fails. */
@@ -752,15 +757,15 @@ drop:
        bnx2x_fp_stats(bp, fp)->eth_q_stats.rx_skb_alloc_failed++;
 }
 
-static int bnx2x_alloc_rx_data(struct bnx2x *bp,
-                              struct bnx2x_fastpath *fp, u16 index)
+static int bnx2x_alloc_rx_data(struct bnx2x *bp, struct bnx2x_fastpath *fp,
+                              u16 index, gfp_t gfp_mask)
 {
        u8 *data;
        struct sw_rx_bd *rx_buf = &fp->rx_buf_ring[index];
        struct eth_rx_bd *rx_bd = &fp->rx_desc_ring[index];
        dma_addr_t mapping;
 
-       data = bnx2x_frag_alloc(fp);
+       data = bnx2x_frag_alloc(fp, gfp_mask);
        if (unlikely(data == NULL))
                return -ENOMEM;
 
@@ -953,7 +958,8 @@ int bnx2x_rx_int(struct bnx2x_fastpath *fp, int budget)
                        memcpy(skb->data, data + pad, len);
                        bnx2x_reuse_rx_data(fp, bd_cons, bd_prod);
                } else {
-                       if (likely(bnx2x_alloc_rx_data(bp, fp, bd_prod) == 0)) {
+                       if (likely(bnx2x_alloc_rx_data(bp, fp, bd_prod,
+                                                      GFP_ATOMIC) == 0)) {
                                dma_unmap_single(&bp->pdev->dev,
                                                 dma_unmap_addr(rx_buf, mapping),
                                                 fp->rx_buf_size,
@@ -1313,7 +1319,8 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
                                struct sw_rx_bd *first_buf =
                                        &tpa_info->first_buf;
 
-                               first_buf->data = bnx2x_frag_alloc(fp);
+                               first_buf->data =
+                                       bnx2x_frag_alloc(fp, GFP_KERNEL);
                                if (!first_buf->data) {
                                        BNX2X_ERR("Failed to allocate TPA skb pool for queue[%d] - disabling TPA on this queue!\n",
                                                  j);
@@ -1335,7 +1342,8 @@ void bnx2x_init_rx_rings(struct bnx2x *bp)
                        for (i = 0, ring_prod = 0;
                             i < MAX_RX_SGE_CNT*NUM_RX_SGE_PAGES; i++) {
 
-                               if (bnx2x_alloc_rx_sge(bp, fp, ring_prod) < 0) {
+                               if (bnx2x_alloc_rx_sge(bp, fp, ring_prod,
+                                                      GFP_KERNEL) < 0) {
                                        BNX2X_ERR("was only able to allocate %d rx sges\n",
                                                  i);
                                        BNX2X_ERR("disabling TPA for queue[%d]\n",
@@ -4221,7 +4229,7 @@ static int bnx2x_alloc_rx_bds(struct bnx2x_fastpath *fp,
         * fp->eth_q_stats.rx_skb_alloc_failed = 0
         */
        for (i = 0; i < rx_ring_size; i++) {
-               if (bnx2x_alloc_rx_data(bp, fp, ring_prod) < 0) {
+               if (bnx2x_alloc_rx_data(bp, fp, ring_prod, GFP_KERNEL) < 0) {
                        failure_cnt++;
                        continue;
                }
index 634a793..2f8dbbb 100644 (file)
@@ -7645,6 +7645,7 @@ static int bnx2x_init_hw_func(struct bnx2x *bp)
 
        bnx2x_init_block(bp, BLOCK_TM, init_phase);
        bnx2x_init_block(bp, BLOCK_DORQ, init_phase);
+       REG_WR(bp, DORQ_REG_MODE_ACT, 1); /* no dpm */
 
        bnx2x_iov_init_dq(bp);
 
index b26eb83..2604b62 100644 (file)
@@ -1756,9 +1756,6 @@ void bnx2x_iov_init_dq(struct bnx2x *bp)
        REG_WR(bp, DORQ_REG_VF_TYPE_MIN_MCID_0, 0);
        REG_WR(bp, DORQ_REG_VF_TYPE_MAX_MCID_0, 0x1ffff);
 
-       /* set the number of VF allowed doorbells to the full DQ range */
-       REG_WR(bp, DORQ_REG_VF_NORM_MAX_CID_COUNT, 0x20000);
-
        /* set the VF doorbell threshold */
        REG_WR(bp, DORQ_REG_VF_USAGE_CT_LIMIT, 4);
 }
index 8030cc0..751d5c7 100644 (file)
@@ -22,7 +22,7 @@ if NET_CADENCE
 
 config ARM_AT91_ETHER
        tristate "AT91RM9200 Ethernet support"
-       depends on GENERIC_HARDIRQS && HAS_DMA
+       depends on HAS_DMA
        select MACB
        ---help---
          If you wish to compile a kernel for the AT91RM9200 and enable
index f0e7ed2..149ac85 100644 (file)
@@ -241,4 +241,22 @@ config IXGBEVF
          will be called ixgbevf.  MSI-X interrupt support is required
          for this driver to work correctly.
 
+config I40E
+       tristate "Intel(R) Ethernet Controller XL710 Family support"
+       depends on PCI
+       ---help---
+         This driver supports Intel(R) Ethernet Controller XL710 Family of
+         devices.  For more information on how to identify your adapter, go
+         to the Adapter & Driver ID Guide at:
+
+         <http://support.intel.com/support/network/adapter/pro100/21397.htm>
+
+         For general information and support, go to the Intel support
+         website at:
+
+         <http://support.intel.com>
+
+         To compile this driver as a module, choose M here. The module
+         will be called i40e.
+
 endif # NET_VENDOR_INTEL
index c8210e6..5bae933 100644 (file)
@@ -9,4 +9,5 @@ obj-$(CONFIG_IGB) += igb/
 obj-$(CONFIG_IGBVF) += igbvf/
 obj-$(CONFIG_IXGBE) += ixgbe/
 obj-$(CONFIG_IXGBEVF) += ixgbevf/
+obj-$(CONFIG_I40E) += i40e/
 obj-$(CONFIG_IXGB) += ixgb/
diff --git a/drivers/net/ethernet/intel/i40e/Makefile b/drivers/net/ethernet/intel/i40e/Makefile
new file mode 100644 (file)
index 0000000..479b2c4
--- /dev/null
@@ -0,0 +1,44 @@
+################################################################################
+#
+# Intel Ethernet Controller XL710 Family Linux Driver
+# Copyright(c) 2013 Intel Corporation.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms and conditions of the GNU General Public License,
+# version 2, as published by the Free Software Foundation.
+#
+# This program is distributed in the hope it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along with
+# this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# The full GNU General Public License is included in this distribution in
+# the file called "COPYING".
+#
+# Contact Information:
+# e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+# Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+#
+################################################################################
+
+#
+# Makefile for the Intel(R) Ethernet Connection XL710 (i40e.ko) driver
+#
+
+obj-$(CONFIG_I40E) += i40e.o
+
+i40e-objs := i40e_main.o \
+       i40e_ethtool.o  \
+       i40e_adminq.o   \
+       i40e_common.o   \
+       i40e_hmc.o      \
+       i40e_lan_hmc.o  \
+       i40e_nvm.o      \
+       i40e_debugfs.o  \
+       i40e_diag.o     \
+       i40e_txrx.o     \
+       i40e_virtchnl_pf.o
diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h
new file mode 100644 (file)
index 0000000..b5252eb
--- /dev/null
@@ -0,0 +1,558 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_H_
+#define _I40E_H_
+
+#include <net/tcp.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/aer.h>
+#include <linux/netdevice.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/string.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <linux/sctp.h>
+#include <linux/pkt_sched.h>
+#include <linux/ipv6.h>
+#include <linux/version.h>
+#include <net/checksum.h>
+#include <net/ip6_checksum.h>
+#include <linux/ethtool.h>
+#include <linux/if_vlan.h>
+#include "i40e_type.h"
+#include "i40e_prototype.h"
+#include "i40e_virtchnl.h"
+#include "i40e_virtchnl_pf.h"
+#include "i40e_txrx.h"
+
+/* Useful i40e defaults */
+#define I40E_BASE_PF_SEID     16
+#define I40E_BASE_VSI_SEID    512
+#define I40E_BASE_VEB_SEID    288
+#define I40E_MAX_VEB          16
+
+#define I40E_MAX_NUM_DESCRIPTORS      4096
+#define I40E_MAX_REGISTER     0x0038FFFF
+#define I40E_DEFAULT_NUM_DESCRIPTORS  512
+#define I40E_REQ_DESCRIPTOR_MULTIPLE  32
+#define I40E_MIN_NUM_DESCRIPTORS      64
+#define I40E_MIN_MSIX                 2
+#define I40E_DEFAULT_NUM_VMDQ_VSI     8 /* max 256 VSIs */
+#define I40E_DEFAULT_QUEUES_PER_VMDQ  2 /* max 16 qps */
+#define I40E_DEFAULT_QUEUES_PER_VF    4
+#define I40E_DEFAULT_QUEUES_PER_TC    1 /* should be a power of 2 */
+#define I40E_FDIR_RING                0
+#define I40E_FDIR_RING_COUNT          32
+#define I40E_MAX_AQ_BUF_SIZE          4096
+#define I40E_AQ_LEN                   32
+#define I40E_AQ_WORK_LIMIT            16
+#define I40E_MAX_USER_PRIORITY        8
+#define I40E_DEFAULT_MSG_ENABLE       4
+
+#define I40E_NVM_VERSION_LO_SHIFT  0
+#define I40E_NVM_VERSION_LO_MASK   (0xf << I40E_NVM_VERSION_LO_SHIFT)
+#define I40E_NVM_VERSION_MID_SHIFT 4
+#define I40E_NVM_VERSION_MID_MASK  (0xff << I40E_NVM_VERSION_MID_SHIFT)
+#define I40E_NVM_VERSION_HI_SHIFT  12
+#define I40E_NVM_VERSION_HI_MASK   (0xf << I40E_NVM_VERSION_HI_SHIFT)
+
+/* magic for getting defines into strings */
+#define STRINGIFY(foo)  #foo
+#define XSTRINGIFY(bar) STRINGIFY(bar)
+
+#ifndef ARCH_HAS_PREFETCH
+#define prefetch(X)
+#endif
+
+#define I40E_RX_DESC(R, i)                     \
+       ((ring_is_16byte_desc_enabled(R))       \
+               ? (union i40e_32byte_rx_desc *) \
+                       (&(((union i40e_16byte_rx_desc *)((R)->desc))[i])) \
+               : (&(((union i40e_32byte_rx_desc *)((R)->desc))[i])))
+#define I40E_TX_DESC(R, i)                     \
+       (&(((struct i40e_tx_desc *)((R)->desc))[i]))
+#define I40E_TX_CTXTDESC(R, i)                 \
+       (&(((struct i40e_tx_context_desc *)((R)->desc))[i]))
+#define I40E_TX_FDIRDESC(R, i)                 \
+       (&(((struct i40e_filter_program_desc *)((R)->desc))[i]))
+
+/* default to trying for four seconds */
+#define I40E_TRY_LINK_TIMEOUT (4 * HZ)
+
+/* driver state flags */
+enum i40e_state_t {
+       __I40E_TESTING,
+       __I40E_CONFIG_BUSY,
+       __I40E_CONFIG_DONE,
+       __I40E_DOWN,
+       __I40E_NEEDS_RESTART,
+       __I40E_SERVICE_SCHED,
+       __I40E_ADMINQ_EVENT_PENDING,
+       __I40E_MDD_EVENT_PENDING,
+       __I40E_VFLR_EVENT_PENDING,
+       __I40E_RESET_RECOVERY_PENDING,
+       __I40E_RESET_INTR_RECEIVED,
+       __I40E_REINIT_REQUESTED,
+       __I40E_PF_RESET_REQUESTED,
+       __I40E_CORE_RESET_REQUESTED,
+       __I40E_GLOBAL_RESET_REQUESTED,
+       __I40E_FILTER_OVERFLOW_PROMISC,
+};
+
+enum i40e_interrupt_policy {
+       I40E_INTERRUPT_BEST_CASE,
+       I40E_INTERRUPT_MEDIUM,
+       I40E_INTERRUPT_LOWEST
+};
+
+struct i40e_lump_tracking {
+       u16 num_entries;
+       u16 search_hint;
+       u16 list[0];
+#define I40E_PILE_VALID_BIT  0x8000
+};
+
+#define I40E_DEFAULT_ATR_SAMPLE_RATE   20
+#define I40E_FDIR_MAX_RAW_PACKET_LOOKUP 512
+struct i40e_fdir_data {
+       u16 q_index;
+       u8  flex_off;
+       u8  pctype;
+       u16 dest_vsi;
+       u8  dest_ctl;
+       u8  fd_status;
+       u16 cnt_index;
+       u32 fd_id;
+       u8  *raw_packet;
+};
+
+#define I40E_DCB_PRIO_TYPE_STRICT      0
+#define I40E_DCB_PRIO_TYPE_ETS         1
+#define I40E_DCB_STRICT_PRIO_CREDITS   127
+#define I40E_MAX_USER_PRIORITY 8
+/* DCB per TC information data structure */
+struct i40e_tc_info {
+       u16     qoffset;        /* Queue offset from base queue */
+       u16     qcount;         /* Total Queues */
+       u8      netdev_tc;      /* Netdev TC index if netdev associated */
+};
+
+/* TC configuration data structure */
+struct i40e_tc_configuration {
+       u8      numtc;          /* Total number of enabled TCs */
+       u8      enabled_tc;     /* TC map */
+       struct i40e_tc_info tc_info[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* struct that defines the Ethernet device */
+struct i40e_pf {
+       struct pci_dev *pdev;
+       struct i40e_hw hw;
+       unsigned long state;
+       unsigned long link_check_timeout;
+       struct msix_entry *msix_entries;
+       u16 num_msix_entries;
+       bool fc_autoneg_status;
+
+       u16 eeprom_version;
+       u16 num_vmdq_vsis;         /* num vmdq pools this pf has set up */
+       u16 num_vmdq_qps;          /* num queue pairs per vmdq pool */
+       u16 num_vmdq_msix;         /* num queue vectors per vmdq pool */
+       u16 num_req_vfs;           /* num vfs requested for this vf */
+       u16 num_vf_qps;            /* num queue pairs per vf */
+       u16 num_tc_qps;            /* num queue pairs per TC */
+       u16 num_lan_qps;           /* num lan queues this pf has set up */
+       u16 num_lan_msix;          /* num queue vectors for the base pf vsi */
+       u16 rss_size;              /* num queues in the RSS array */
+       u16 rss_size_max;          /* HW defined max RSS queues */
+       u16 fdir_pf_filter_count;  /* num of guaranteed filters for this PF */
+       u8 atr_sample_rate;
+
+       enum i40e_interrupt_policy int_policy;
+       u16 rx_itr_default;
+       u16 tx_itr_default;
+       u16 msg_enable;
+       char misc_int_name[IFNAMSIZ + 9];
+       u16 adminq_work_limit; /* num of admin receive queue desc to process */
+       int service_timer_period;
+       struct timer_list service_timer;
+       struct work_struct service_task;
+
+       u64 flags;
+#define I40E_FLAG_RX_CSUM_ENABLED              (u64)(1 << 1)
+#define I40E_FLAG_MSI_ENABLED                  (u64)(1 << 2)
+#define I40E_FLAG_MSIX_ENABLED                 (u64)(1 << 3)
+#define I40E_FLAG_RX_1BUF_ENABLED              (u64)(1 << 4)
+#define I40E_FLAG_RX_PS_ENABLED                (u64)(1 << 5)
+#define I40E_FLAG_RSS_ENABLED                  (u64)(1 << 6)
+#define I40E_FLAG_MQ_ENABLED                   (u64)(1 << 7)
+#define I40E_FLAG_VMDQ_ENABLED                 (u64)(1 << 8)
+#define I40E_FLAG_FDIR_REQUIRES_REINIT         (u64)(1 << 9)
+#define I40E_FLAG_NEED_LINK_UPDATE             (u64)(1 << 10)
+#define I40E_FLAG_IN_NETPOLL                   (u64)(1 << 13)
+#define I40E_FLAG_16BYTE_RX_DESC_ENABLED       (u64)(1 << 14)
+#define I40E_FLAG_CLEAN_ADMINQ                 (u64)(1 << 15)
+#define I40E_FLAG_FILTER_SYNC                  (u64)(1 << 16)
+#define I40E_FLAG_PROCESS_MDD_EVENT            (u64)(1 << 18)
+#define I40E_FLAG_PROCESS_VFLR_EVENT           (u64)(1 << 19)
+#define I40E_FLAG_SRIOV_ENABLED                (u64)(1 << 20)
+#define I40E_FLAG_DCB_ENABLED                  (u64)(1 << 21)
+#define I40E_FLAG_FDIR_ENABLED                 (u64)(1 << 22)
+#define I40E_FLAG_FDIR_ATR_ENABLED             (u64)(1 << 23)
+#define I40E_FLAG_MFP_ENABLED                  (u64)(1 << 27)
+
+       u16 num_tx_queues;
+       u16 num_rx_queues;
+
+       bool stat_offsets_loaded;
+       struct i40e_hw_port_stats stats;
+       struct i40e_hw_port_stats stats_offsets;
+       u32 tx_timeout_count;
+       u32 tx_timeout_recovery_level;
+       unsigned long tx_timeout_last_recovery;
+       u32 hw_csum_rx_error;
+       u32 led_status;
+       u16 corer_count; /* Core reset count */
+       u16 globr_count; /* Global reset count */
+       u16 empr_count; /* EMP reset count */
+       u16 pfr_count; /* PF reset count */
+
+       struct mutex switch_mutex;
+       u16 lan_vsi;       /* our default LAN VSI */
+       u16 lan_veb;       /* initial relay, if exists */
+#define I40E_NO_VEB   0xffff
+#define I40E_NO_VSI   0xffff
+       u16 next_vsi;      /* Next unallocated VSI - 0-based! */
+       struct i40e_vsi **vsi;
+       struct i40e_veb *veb[I40E_MAX_VEB];
+
+       struct i40e_lump_tracking *qp_pile;
+       struct i40e_lump_tracking *irq_pile;
+
+       /* switch config info */
+       u16 pf_seid;
+       u16 main_vsi_seid;
+       u16 mac_seid;
+       struct i40e_aqc_get_switch_config_data *sw_config;
+       struct kobject *switch_kobj;
+#ifdef CONFIG_DEBUG_FS
+       struct dentry *i40e_dbg_pf;
+#endif /* CONFIG_DEBUG_FS */
+
+       /* sr-iov config info */
+       struct i40e_vf *vf;
+       int num_alloc_vfs;      /* actual number of VFs allocated */
+       u32 vf_aq_requests;
+
+       /* DCBx/DCBNL capability for PF that indicates
+        * whether DCBx is managed by firmware or host
+        * based agent (LLDPAD). Also, indicates what
+        * flavor of DCBx protocol (IEEE/CEE) is supported
+        * by the device. For now we're supporting IEEE
+        * mode only.
+        */
+       u16 dcbx_cap;
+
+       u32     fcoe_hmc_filt_num;
+       u32     fcoe_hmc_cntx_num;
+       struct i40e_filter_control_settings filter_settings;
+};
+
+struct i40e_mac_filter {
+       struct list_head list;
+       u8 macaddr[ETH_ALEN];
+#define I40E_VLAN_ANY -1
+       s16 vlan;
+       u8 counter;             /* number of instances of this filter */
+       bool is_vf;             /* filter belongs to a VF */
+       bool is_netdev;         /* filter belongs to a netdev */
+       bool changed;           /* filter needs to be sync'd to the HW */
+};
+
+struct i40e_veb {
+       struct i40e_pf *pf;
+       u16 idx;
+       u16 veb_idx;           /* index of VEB parent */
+       u16 seid;
+       u16 uplink_seid;
+       u16 stats_idx;           /* index of VEB parent */
+       u8  enabled_tc;
+       u16 flags;
+       u16 bw_limit;
+       u8  bw_max_quanta;
+       bool is_abs_credits;
+       u8  bw_tc_share_credits[I40E_MAX_TRAFFIC_CLASS];
+       u16 bw_tc_limit_credits[I40E_MAX_TRAFFIC_CLASS];
+       u8  bw_tc_max_quanta[I40E_MAX_TRAFFIC_CLASS];
+       struct kobject *kobj;
+       bool stat_offsets_loaded;
+       struct i40e_eth_stats stats;
+       struct i40e_eth_stats stats_offsets;
+};
+
+/* struct that defines a VSI, associated with a dev */
+struct i40e_vsi {
+       struct net_device *netdev;
+       unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)];
+       bool netdev_registered;
+       bool stat_offsets_loaded;
+
+       u32 current_netdev_flags;
+       unsigned long state;
+#define I40E_VSI_FLAG_FILTER_CHANGED  (1<<0)
+#define I40E_VSI_FLAG_VEB_OWNER       (1<<1)
+       unsigned long flags;
+
+       struct list_head mac_filter_list;
+
+       /* VSI stats */
+       struct rtnl_link_stats64 net_stats;
+       struct rtnl_link_stats64 net_stats_offsets;
+       struct i40e_eth_stats eth_stats;
+       struct i40e_eth_stats eth_stats_offsets;
+       u32 tx_restart;
+       u32 tx_busy;
+       u32 rx_buf_failed;
+       u32 rx_page_failed;
+
+       /* These are arrays of rings, allocated at run-time */
+       struct i40e_ring *rx_rings;
+       struct i40e_ring *tx_rings;
+
+       u16 work_limit;
+       /* high bit set means dynamic, use accessor routines to read/write.
+        * hardware only supports 2us resolution for the ITR registers.
+        * these values always store the USER setting, and must be converted
+        * before programming to a register.
+        */
+       u16 rx_itr_setting;
+       u16 tx_itr_setting;
+
+       u16 max_frame;
+       u16 rx_hdr_len;
+       u16 rx_buf_len;
+       u8  dtype;
+
+       /* List of q_vectors allocated to this VSI */
+       struct i40e_q_vector *q_vectors;
+       int num_q_vectors;
+       int base_vector;
+
+       u16 seid;            /* HW index of this VSI (absolute index) */
+       u16 id;              /* VSI number */
+       u16 uplink_seid;
+
+       u16 base_queue;      /* vsi's first queue in hw array */
+       u16 alloc_queue_pairs; /* Allocated Tx/Rx queues */
+       u16 num_queue_pairs; /* Used tx and rx pairs */
+       u16 num_desc;
+       enum i40e_vsi_type type;  /* VSI type, e.g., LAN, FCoE, etc */
+       u16 vf_id;              /* Virtual function ID for SRIOV VSIs */
+
+       struct i40e_tc_configuration tc_config;
+       struct i40e_aqc_vsi_properties_data info;
+
+       /* VSI BW limit (absolute across all TCs) */
+       u16 bw_limit;           /* VSI BW Limit (0 = disabled) */
+       u8  bw_max_quanta;      /* Max Quanta when BW limit is enabled */
+
+       /* Relative TC credits across VSIs */
+       u8  bw_ets_share_credits[I40E_MAX_TRAFFIC_CLASS];
+       /* TC BW limit credits within VSI */
+       u16  bw_ets_limit_credits[I40E_MAX_TRAFFIC_CLASS];
+       /* TC BW limit max quanta within VSI */
+       u8  bw_ets_max_quanta[I40E_MAX_TRAFFIC_CLASS];
+
+       struct i40e_pf *back;  /* Backreference to associated PF */
+       u16 idx;               /* index in pf->vsi[] */
+       u16 veb_idx;           /* index of VEB parent */
+       struct kobject *kobj;  /* sysfs object */
+
+       /* VSI specific handlers */
+       irqreturn_t (*irq_handler)(int irq, void *data);
+} ____cacheline_internodealigned_in_smp;
+
+struct i40e_netdev_priv {
+       struct i40e_vsi *vsi;
+};
+
+/* struct that defines an interrupt vector */
+struct i40e_q_vector {
+       struct i40e_vsi *vsi;
+
+       u16 v_idx;              /* index in the vsi->q_vector array. */
+       u16 reg_idx;            /* register index of the interrupt */
+
+       struct napi_struct napi;
+
+       struct i40e_ring_container rx;
+       struct i40e_ring_container tx;
+
+       u8 num_ringpairs;       /* total number of ring pairs in vector */
+
+       char name[IFNAMSIZ + 9];
+       cpumask_t affinity_mask;
+} ____cacheline_internodealigned_in_smp;
+
+/* lan device */
+struct i40e_device {
+       struct list_head list;
+       struct i40e_pf *pf;
+};
+
+/**
+ * i40e_fw_version_str - format the FW and NVM version strings
+ * @hw: ptr to the hardware info
+ **/
+static inline char *i40e_fw_version_str(struct i40e_hw *hw)
+{
+       static char buf[32];
+
+       snprintf(buf, sizeof(buf),
+                "f%d.%d a%d.%d n%02d.%02d.%02d e%08x",
+                hw->aq.fw_maj_ver, hw->aq.fw_min_ver,
+                hw->aq.api_maj_ver, hw->aq.api_min_ver,
+                (hw->nvm.version & I40E_NVM_VERSION_HI_MASK)
+                                               >> I40E_NVM_VERSION_HI_SHIFT,
+                (hw->nvm.version & I40E_NVM_VERSION_MID_MASK)
+                                               >> I40E_NVM_VERSION_MID_SHIFT,
+                (hw->nvm.version & I40E_NVM_VERSION_LO_MASK)
+                                               >> I40E_NVM_VERSION_LO_SHIFT,
+                hw->nvm.eetrack);
+
+       return buf;
+}
+
+/**
+ * i40e_netdev_to_pf: Retrieve the PF struct for given netdev
+ * @netdev: the corresponding netdev
+ *
+ * Return the PF struct for the given netdev
+ **/
+static inline struct i40e_pf *i40e_netdev_to_pf(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       return vsi->back;
+}
+
+static inline void i40e_vsi_setup_irqhandler(struct i40e_vsi *vsi,
+                               irqreturn_t (*irq_handler)(int, void *))
+{
+       vsi->irq_handler = irq_handler;
+}
+
+/**
+ * i40e_rx_is_programming_status - check for programming status descriptor
+ * @qw: the first quad word of the program status descriptor
+ *
+ * The value of in the descriptor length field indicate if this
+ * is a programming status descriptor for flow director or FCoE
+ * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
+ * it is a packet descriptor.
+ **/
+static inline bool i40e_rx_is_programming_status(u64 qw)
+{
+       return I40E_RX_PROG_STATUS_DESC_LENGTH ==
+               (qw >> I40E_RX_PROG_STATUS_DESC_LENGTH_SHIFT);
+}
+
+/* needed by i40e_ethtool.c */
+int i40e_up(struct i40e_vsi *vsi);
+void i40e_down(struct i40e_vsi *vsi);
+extern const char i40e_driver_name[];
+extern const char i40e_driver_version_str[];
+void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags);
+void i40e_update_stats(struct i40e_vsi *vsi);
+void i40e_update_eth_stats(struct i40e_vsi *vsi);
+struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi);
+int i40e_fetch_switch_configuration(struct i40e_pf *pf,
+                                   bool printconfig);
+
+/* needed by i40e_main.c */
+void i40e_add_fdir_filter(struct i40e_fdir_data fdir_data,
+                         struct i40e_ring *tx_ring);
+void i40e_add_remove_filter(struct i40e_fdir_data fdir_data,
+                           struct i40e_ring *tx_ring);
+void i40e_update_fdir_filter(struct i40e_fdir_data fdir_data,
+                            struct i40e_ring *tx_ring);
+int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+                            struct i40e_pf *pf, bool add);
+
+void i40e_set_ethtool_ops(struct net_device *netdev);
+struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
+                                       u8 *macaddr, s16 vlan,
+                                       bool is_vf, bool is_netdev);
+void i40e_del_filter(struct i40e_vsi *vsi, u8 *macaddr, s16 vlan,
+                    bool is_vf, bool is_netdev);
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi);
+struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
+                               u16 uplink, u32 param1);
+int i40e_vsi_release(struct i40e_vsi *vsi);
+struct i40e_vsi *i40e_vsi_lookup(struct i40e_pf *pf, enum i40e_vsi_type type,
+                                struct i40e_vsi *start_vsi);
+struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags, u16 uplink_seid,
+                               u16 downlink_seid, u8 enabled_tc);
+void i40e_veb_release(struct i40e_veb *veb);
+
+i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid);
+void i40e_vsi_remove_pvid(struct i40e_vsi *vsi);
+void i40e_vsi_reset_stats(struct i40e_vsi *vsi);
+void i40e_pf_reset_stats(struct i40e_pf *pf);
+#ifdef CONFIG_DEBUG_FS
+void i40e_dbg_pf_init(struct i40e_pf *pf);
+void i40e_dbg_pf_exit(struct i40e_pf *pf);
+void i40e_dbg_init(void);
+void i40e_dbg_exit(void);
+#else
+static inline void i40e_dbg_pf_init(struct i40e_pf *pf) {}
+static inline void i40e_dbg_pf_exit(struct i40e_pf *pf) {}
+static inline void i40e_dbg_init(void) {}
+static inline void i40e_dbg_exit(void) {}
+#endif /* CONFIG_DEBUG_FS*/
+void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector);
+int i40e_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd);
+void i40e_vlan_stripping_disable(struct i40e_vsi *vsi);
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid);
+int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid);
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
+                                            bool is_vf, bool is_netdev);
+bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi);
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
+                                     bool is_vf, bool is_netdev);
+void i40e_vlan_stripping_enable(struct i40e_vsi *vsi);
+
+#endif /* _I40E_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
new file mode 100644 (file)
index 0000000..0c524fa
--- /dev/null
@@ -0,0 +1,983 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_status.h"
+#include "i40e_type.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+
+/**
+ *  i40e_adminq_init_regs - Initialize AdminQ registers
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the alloc_asq and alloc_arq functions have already been called
+ **/
+static void i40e_adminq_init_regs(struct i40e_hw *hw)
+{
+       /* set head and tail registers in our local struct */
+       if (hw->mac.type == I40E_MAC_VF) {
+               hw->aq.asq.tail = I40E_VF_ATQT1;
+               hw->aq.asq.head = I40E_VF_ATQH1;
+               hw->aq.arq.tail = I40E_VF_ARQT1;
+               hw->aq.arq.head = I40E_VF_ARQH1;
+       } else {
+               hw->aq.asq.tail = I40E_PF_ATQT;
+               hw->aq.asq.head = I40E_PF_ATQH;
+               hw->aq.arq.tail = I40E_PF_ARQT;
+               hw->aq.arq.head = I40E_PF_ARQH;
+       }
+}
+
+/**
+ *  i40e_alloc_adminq_asq_ring - Allocate Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ **/
+static i40e_status i40e_alloc_adminq_asq_ring(struct i40e_hw *hw)
+{
+       i40e_status ret_code;
+       struct i40e_virt_mem mem;
+
+       ret_code = i40e_allocate_dma_mem(hw, &hw->aq.asq_mem,
+                                        i40e_mem_atq_ring,
+                                        (hw->aq.num_asq_entries *
+                                        sizeof(struct i40e_aq_desc)),
+                                        I40E_ADMINQ_DESC_ALIGNMENT);
+       if (ret_code)
+               return ret_code;
+
+       hw->aq.asq.desc = hw->aq.asq_mem.va;
+       hw->aq.asq.dma_addr = hw->aq.asq_mem.pa;
+
+       ret_code = i40e_allocate_virt_mem(hw, &mem,
+                                         (hw->aq.num_asq_entries *
+                                         sizeof(struct i40e_asq_cmd_details)));
+       if (ret_code) {
+               i40e_free_dma_mem(hw, &hw->aq.asq_mem);
+               hw->aq.asq_mem.va = NULL;
+               hw->aq.asq_mem.pa = 0;
+               return ret_code;
+       }
+
+       hw->aq.asq.details = mem.va;
+
+       return ret_code;
+}
+
+/**
+ *  i40e_alloc_adminq_arq_ring - Allocate Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ **/
+static i40e_status i40e_alloc_adminq_arq_ring(struct i40e_hw *hw)
+{
+       i40e_status ret_code;
+
+       ret_code = i40e_allocate_dma_mem(hw, &hw->aq.arq_mem,
+                                        i40e_mem_arq_ring,
+                                        (hw->aq.num_arq_entries *
+                                        sizeof(struct i40e_aq_desc)),
+                                        I40E_ADMINQ_DESC_ALIGNMENT);
+       if (ret_code)
+               return ret_code;
+
+       hw->aq.arq.desc = hw->aq.arq_mem.va;
+       hw->aq.arq.dma_addr = hw->aq.arq_mem.pa;
+
+       return ret_code;
+}
+
+/**
+ *  i40e_free_adminq_asq - Free Admin Queue send rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted send buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void i40e_free_adminq_asq(struct i40e_hw *hw)
+{
+       struct i40e_virt_mem mem;
+
+       i40e_free_dma_mem(hw, &hw->aq.asq_mem);
+       hw->aq.asq_mem.va = NULL;
+       hw->aq.asq_mem.pa = 0;
+       mem.va = hw->aq.asq.details;
+       i40e_free_virt_mem(hw, &mem);
+       hw->aq.asq.details = NULL;
+}
+
+/**
+ *  i40e_free_adminq_arq - Free Admin Queue receive rings
+ *  @hw: pointer to the hardware structure
+ *
+ *  This assumes the posted receive buffers have already been cleaned
+ *  and de-allocated
+ **/
+static void i40e_free_adminq_arq(struct i40e_hw *hw)
+{
+       i40e_free_dma_mem(hw, &hw->aq.arq_mem);
+       hw->aq.arq_mem.va = NULL;
+       hw->aq.arq_mem.pa = 0;
+}
+
+/**
+ *  i40e_alloc_arq_bufs - Allocate pre-posted buffers for the receive queue
+ *  @hw:     pointer to the hardware structure
+ **/
+static i40e_status i40e_alloc_arq_bufs(struct i40e_hw *hw)
+{
+       i40e_status ret_code;
+       struct i40e_aq_desc *desc;
+       struct i40e_virt_mem mem;
+       struct i40e_dma_mem *bi;
+       int i;
+
+       /* We'll be allocating the buffer info memory first, then we can
+        * allocate the mapped buffers for the event processing
+        */
+
+       /* buffer_info structures do not need alignment */
+       ret_code = i40e_allocate_virt_mem(hw, &mem, (hw->aq.num_arq_entries *
+                                         sizeof(struct i40e_dma_mem)));
+       if (ret_code)
+               goto alloc_arq_bufs;
+       hw->aq.arq.r.arq_bi = (struct i40e_dma_mem *)mem.va;
+
+       /* allocate the mapped buffers */
+       for (i = 0; i < hw->aq.num_arq_entries; i++) {
+               bi = &hw->aq.arq.r.arq_bi[i];
+               ret_code = i40e_allocate_dma_mem(hw, bi,
+                                                i40e_mem_arq_buf,
+                                                hw->aq.arq_buf_size,
+                                                I40E_ADMINQ_DESC_ALIGNMENT);
+               if (ret_code)
+                       goto unwind_alloc_arq_bufs;
+
+               /* now configure the descriptors for use */
+               desc = I40E_ADMINQ_DESC(hw->aq.arq, i);
+
+               desc->flags = cpu_to_le16(I40E_AQ_FLAG_BUF);
+               if (hw->aq.arq_buf_size > I40E_AQ_LARGE_BUF)
+                       desc->flags |= cpu_to_le16(I40E_AQ_FLAG_LB);
+               desc->opcode = 0;
+               /* This is in accordance with Admin queue design, there is no
+                * register for buffer size configuration
+                */
+               desc->datalen = cpu_to_le16((u16)bi->size);
+               desc->retval = 0;
+               desc->cookie_high = 0;
+               desc->cookie_low = 0;
+               desc->params.external.addr_high =
+                       cpu_to_le32(upper_32_bits(bi->pa));
+               desc->params.external.addr_low =
+                       cpu_to_le32(lower_32_bits(bi->pa));
+               desc->params.external.param0 = 0;
+               desc->params.external.param1 = 0;
+       }
+
+alloc_arq_bufs:
+       return ret_code;
+
+unwind_alloc_arq_bufs:
+       /* don't try to free the one that failed... */
+       i--;
+       for (; i >= 0; i--)
+               i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+       mem.va = hw->aq.arq.r.arq_bi;
+       i40e_free_virt_mem(hw, &mem);
+
+       return ret_code;
+}
+
+/**
+ *  i40e_alloc_asq_bufs - Allocate empty buffer structs for the send queue
+ *  @hw:     pointer to the hardware structure
+ **/
+static i40e_status i40e_alloc_asq_bufs(struct i40e_hw *hw)
+{
+       i40e_status ret_code;
+       struct i40e_virt_mem mem;
+       struct i40e_dma_mem *bi;
+       int i;
+
+       /* No mapped memory needed yet, just the buffer info structures */
+       ret_code = i40e_allocate_virt_mem(hw, &mem, (hw->aq.num_asq_entries *
+                                         sizeof(struct i40e_dma_mem)));
+       if (ret_code)
+               goto alloc_asq_bufs;
+       hw->aq.asq.r.asq_bi = (struct i40e_dma_mem *)mem.va;
+
+       /* allocate the mapped buffers */
+       for (i = 0; i < hw->aq.num_asq_entries; i++) {
+               bi = &hw->aq.asq.r.asq_bi[i];
+               ret_code = i40e_allocate_dma_mem(hw, bi,
+                                                i40e_mem_asq_buf,
+                                                hw->aq.asq_buf_size,
+                                                I40E_ADMINQ_DESC_ALIGNMENT);
+               if (ret_code)
+                       goto unwind_alloc_asq_bufs;
+       }
+alloc_asq_bufs:
+       return ret_code;
+
+unwind_alloc_asq_bufs:
+       /* don't try to free the one that failed... */
+       i--;
+       for (; i >= 0; i--)
+               i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+       mem.va = hw->aq.asq.r.asq_bi;
+       i40e_free_virt_mem(hw, &mem);
+
+       return ret_code;
+}
+
+/**
+ *  i40e_free_arq_bufs - Free receive queue buffer info elements
+ *  @hw:     pointer to the hardware structure
+ **/
+static void i40e_free_arq_bufs(struct i40e_hw *hw)
+{
+       struct i40e_virt_mem mem;
+       int i;
+
+       for (i = 0; i < hw->aq.num_arq_entries; i++)
+               i40e_free_dma_mem(hw, &hw->aq.arq.r.arq_bi[i]);
+
+       mem.va = hw->aq.arq.r.arq_bi;
+       i40e_free_virt_mem(hw, &mem);
+}
+
+/**
+ *  i40e_free_asq_bufs - Free send queue buffer info elements
+ *  @hw:     pointer to the hardware structure
+ **/
+static void i40e_free_asq_bufs(struct i40e_hw *hw)
+{
+       struct i40e_virt_mem mem;
+       int i;
+
+       /* only unmap if the address is non-NULL */
+       for (i = 0; i < hw->aq.num_asq_entries; i++)
+               if (hw->aq.asq.r.asq_bi[i].pa)
+                       i40e_free_dma_mem(hw, &hw->aq.asq.r.asq_bi[i]);
+
+       /* now free the buffer info list */
+       mem.va = hw->aq.asq.r.asq_bi;
+       i40e_free_virt_mem(hw, &mem);
+}
+
+/**
+ *  i40e_config_asq_regs - configure ASQ registers
+ *  @hw:     pointer to the hardware structure
+ *
+ *  Configure base address and length registers for the transmit queue
+ **/
+static void i40e_config_asq_regs(struct i40e_hw *hw)
+{
+       if (hw->mac.type == I40E_MAC_VF) {
+               /* configure the transmit queue */
+               wr32(hw, I40E_VF_ATQBAH1, upper_32_bits(hw->aq.asq.dma_addr));
+               wr32(hw, I40E_VF_ATQBAL1, lower_32_bits(hw->aq.asq.dma_addr));
+               wr32(hw, I40E_VF_ATQLEN1, (hw->aq.num_asq_entries |
+                                         I40E_VF_ATQLEN1_ATQENABLE_MASK));
+       } else {
+               /* configure the transmit queue */
+               wr32(hw, I40E_PF_ATQBAH, upper_32_bits(hw->aq.asq.dma_addr));
+               wr32(hw, I40E_PF_ATQBAL, lower_32_bits(hw->aq.asq.dma_addr));
+               wr32(hw, I40E_PF_ATQLEN, (hw->aq.num_asq_entries |
+                                         I40E_PF_ATQLEN_ATQENABLE_MASK));
+       }
+}
+
+/**
+ *  i40e_config_arq_regs - ARQ register configuration
+ *  @hw:     pointer to the hardware structure
+ *
+ * Configure base address and length registers for the receive (event queue)
+ **/
+static void i40e_config_arq_regs(struct i40e_hw *hw)
+{
+       if (hw->mac.type == I40E_MAC_VF) {
+               /* configure the receive queue */
+               wr32(hw, I40E_VF_ARQBAH1, upper_32_bits(hw->aq.arq.dma_addr));
+               wr32(hw, I40E_VF_ARQBAL1, lower_32_bits(hw->aq.arq.dma_addr));
+               wr32(hw, I40E_VF_ARQLEN1, (hw->aq.num_arq_entries |
+                                         I40E_VF_ARQLEN1_ARQENABLE_MASK));
+       } else {
+               /* configure the receive queue */
+               wr32(hw, I40E_PF_ARQBAH, upper_32_bits(hw->aq.arq.dma_addr));
+               wr32(hw, I40E_PF_ARQBAL, lower_32_bits(hw->aq.arq.dma_addr));
+               wr32(hw, I40E_PF_ARQLEN, (hw->aq.num_arq_entries |
+                                         I40E_PF_ARQLEN_ARQENABLE_MASK));
+       }
+
+       /* Update tail in the HW to post pre-allocated buffers */
+       wr32(hw, hw->aq.arq.tail, hw->aq.num_arq_entries - 1);
+}
+
+/**
+ *  i40e_init_asq - main initialization routine for ASQ
+ *  @hw:     pointer to the hardware structure
+ *
+ *  This is the main initialization routine for the Admin Send Queue
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static i40e_status i40e_init_asq(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+
+       if (hw->aq.asq.count > 0) {
+               /* queue already initialized */
+               ret_code = I40E_ERR_NOT_READY;
+               goto init_adminq_exit;
+       }
+
+       /* verify input for valid configuration */
+       if ((hw->aq.num_asq_entries == 0) ||
+           (hw->aq.asq_buf_size == 0)) {
+               ret_code = I40E_ERR_CONFIG;
+               goto init_adminq_exit;
+       }
+
+       hw->aq.asq.next_to_use = 0;
+       hw->aq.asq.next_to_clean = 0;
+       hw->aq.asq.count = hw->aq.num_asq_entries;
+
+       /* allocate the ring memory */
+       ret_code = i40e_alloc_adminq_asq_ring(hw);
+       if (ret_code)
+               goto init_adminq_exit;
+
+       /* allocate buffers in the rings */
+       ret_code = i40e_alloc_asq_bufs(hw);
+       if (ret_code)
+               goto init_adminq_free_rings;
+
+       /* initialize base registers */
+       i40e_config_asq_regs(hw);
+
+       /* success! */
+       goto init_adminq_exit;
+
+init_adminq_free_rings:
+       i40e_free_adminq_asq(hw);
+
+init_adminq_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_init_arq - initialize ARQ
+ *  @hw:     pointer to the hardware structure
+ *
+ *  The main initialization routine for the Admin Receive (Event) Queue.
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.arq_buf_size
+ *
+ *  Do *NOT* hold the lock when calling this as the memory allocation routines
+ *  called are not going to be atomic context safe
+ **/
+static i40e_status i40e_init_arq(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+
+       if (hw->aq.arq.count > 0) {
+               /* queue already initialized */
+               ret_code = I40E_ERR_NOT_READY;
+               goto init_adminq_exit;
+       }
+
+       /* verify input for valid configuration */
+       if ((hw->aq.num_arq_entries == 0) ||
+           (hw->aq.arq_buf_size == 0)) {
+               ret_code = I40E_ERR_CONFIG;
+               goto init_adminq_exit;
+       }
+
+       hw->aq.arq.next_to_use = 0;
+       hw->aq.arq.next_to_clean = 0;
+       hw->aq.arq.count = hw->aq.num_arq_entries;
+
+       /* allocate the ring memory */
+       ret_code = i40e_alloc_adminq_arq_ring(hw);
+       if (ret_code)
+               goto init_adminq_exit;
+
+       /* allocate buffers in the rings */
+       ret_code = i40e_alloc_arq_bufs(hw);
+       if (ret_code)
+               goto init_adminq_free_rings;
+
+       /* initialize base registers */
+       i40e_config_arq_regs(hw);
+
+       /* success! */
+       goto init_adminq_exit;
+
+init_adminq_free_rings:
+       i40e_free_adminq_arq(hw);
+
+init_adminq_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_shutdown_asq - shutdown the ASQ
+ *  @hw:     pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Send Queue
+ **/
+static i40e_status i40e_shutdown_asq(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+
+       if (hw->aq.asq.count == 0)
+               return I40E_ERR_NOT_READY;
+
+       /* Stop firmware AdminQ processing */
+       if (hw->mac.type == I40E_MAC_VF)
+               wr32(hw, I40E_VF_ATQLEN1, 0);
+       else
+               wr32(hw, I40E_PF_ATQLEN, 0);
+
+       /* make sure lock is available */
+       mutex_lock(&hw->aq.asq_mutex);
+
+       hw->aq.asq.count = 0; /* to indicate uninitialized queue */
+
+       /* free ring buffers */
+       i40e_free_asq_bufs(hw);
+       /* free the ring descriptors */
+       i40e_free_adminq_asq(hw);
+
+       mutex_unlock(&hw->aq.asq_mutex);
+
+       return ret_code;
+}
+
+/**
+ *  i40e_shutdown_arq - shutdown ARQ
+ *  @hw:     pointer to the hardware structure
+ *
+ *  The main shutdown routine for the Admin Receive Queue
+ **/
+static i40e_status i40e_shutdown_arq(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+
+       if (hw->aq.arq.count == 0)
+               return I40E_ERR_NOT_READY;
+
+       /* Stop firmware AdminQ processing */
+       if (hw->mac.type == I40E_MAC_VF)
+               wr32(hw, I40E_VF_ARQLEN1, 0);
+       else
+               wr32(hw, I40E_PF_ARQLEN, 0);
+
+       /* make sure lock is available */
+       mutex_lock(&hw->aq.arq_mutex);
+
+       hw->aq.arq.count = 0; /* to indicate uninitialized queue */
+
+       /* free ring buffers */
+       i40e_free_arq_bufs(hw);
+       /* free the ring descriptors */
+       i40e_free_adminq_arq(hw);
+
+       mutex_unlock(&hw->aq.arq_mutex);
+
+       return ret_code;
+}
+
+/**
+ *  i40e_init_adminq - main initialization routine for Admin Queue
+ *  @hw:     pointer to the hardware structure
+ *
+ *  Prior to calling this function, drivers *MUST* set the following fields
+ *  in the hw->aq structure:
+ *     - hw->aq.num_asq_entries
+ *     - hw->aq.num_arq_entries
+ *     - hw->aq.arq_buf_size
+ *     - hw->aq.asq_buf_size
+ **/
+i40e_status i40e_init_adminq(struct i40e_hw *hw)
+{
+       u16 eetrack_lo, eetrack_hi;
+       i40e_status ret_code;
+
+       /* verify input for valid configuration */
+       if ((hw->aq.num_arq_entries == 0) ||
+           (hw->aq.num_asq_entries == 0) ||
+           (hw->aq.arq_buf_size == 0) ||
+           (hw->aq.asq_buf_size == 0)) {
+               ret_code = I40E_ERR_CONFIG;
+               goto init_adminq_exit;
+       }
+
+       /* initialize locks */
+       mutex_init(&hw->aq.asq_mutex);
+       mutex_init(&hw->aq.arq_mutex);
+
+       /* Set up register offsets */
+       i40e_adminq_init_regs(hw);
+
+       /* allocate the ASQ */
+       ret_code = i40e_init_asq(hw);
+       if (ret_code)
+               goto init_adminq_destroy_locks;
+
+       /* allocate the ARQ */
+       ret_code = i40e_init_arq(hw);
+       if (ret_code)
+               goto init_adminq_free_asq;
+
+       ret_code = i40e_aq_get_firmware_version(hw,
+                                    &hw->aq.fw_maj_ver, &hw->aq.fw_min_ver,
+                                    &hw->aq.api_maj_ver, &hw->aq.api_min_ver,
+                                    NULL);
+       if (ret_code)
+               goto init_adminq_free_arq;
+
+       if (hw->aq.api_maj_ver != I40E_FW_API_VERSION_MAJOR ||
+           hw->aq.api_min_ver != I40E_FW_API_VERSION_MINOR) {
+               ret_code = I40E_ERR_FIRMWARE_API_VERSION;
+               goto init_adminq_free_arq;
+       }
+       i40e_read_nvm_word(hw, I40E_SR_NVM_IMAGE_VERSION, &hw->nvm.version);
+       i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_LO, &eetrack_lo);
+       i40e_read_nvm_word(hw, I40E_SR_NVM_EETRACK_HI, &eetrack_hi);
+       hw->nvm.eetrack = (eetrack_hi << 16) | eetrack_lo;
+
+       ret_code = i40e_aq_set_hmc_resource_profile(hw,
+                                                   I40E_HMC_PROFILE_DEFAULT,
+                                                   0,
+                                                   NULL);
+       ret_code = 0;
+
+       /* success! */
+       goto init_adminq_exit;
+
+init_adminq_free_arq:
+       i40e_shutdown_arq(hw);
+init_adminq_free_asq:
+       i40e_shutdown_asq(hw);
+init_adminq_destroy_locks:
+
+init_adminq_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_shutdown_adminq - shutdown routine for the Admin Queue
+ *  @hw:     pointer to the hardware structure
+ **/
+i40e_status i40e_shutdown_adminq(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+
+       i40e_shutdown_asq(hw);
+       i40e_shutdown_arq(hw);
+
+       /* destroy the locks */
+
+       return ret_code;
+}
+
+/**
+ *  i40e_clean_asq - cleans Admin send queue
+ *  @asq: pointer to the adminq send ring
+ *
+ *  returns the number of free desc
+ **/
+static u16 i40e_clean_asq(struct i40e_hw *hw)
+{
+       struct i40e_adminq_ring *asq = &(hw->aq.asq);
+       struct i40e_asq_cmd_details *details;
+       u16 ntc = asq->next_to_clean;
+       struct i40e_aq_desc desc_cb;
+       struct i40e_aq_desc *desc;
+
+       desc = I40E_ADMINQ_DESC(*asq, ntc);
+       details = I40E_ADMINQ_DETAILS(*asq, ntc);
+       while (rd32(hw, hw->aq.asq.head) != ntc) {
+               if (details->callback) {
+                       I40E_ADMINQ_CALLBACK cb_func =
+                                       (I40E_ADMINQ_CALLBACK)details->callback;
+                       desc_cb = *desc;
+                       cb_func(hw, &desc_cb);
+               }
+               memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+               memset((void *)details, 0,
+                      sizeof(struct i40e_asq_cmd_details));
+               ntc++;
+               if (ntc == asq->count)
+                       ntc = 0;
+               desc = I40E_ADMINQ_DESC(*asq, ntc);
+               details = I40E_ADMINQ_DETAILS(*asq, ntc);
+       }
+
+       asq->next_to_clean = ntc;
+
+       return I40E_DESC_UNUSED(asq);
+}
+
+/**
+ *  i40e_asq_done - check if FW has processed the Admin Send Queue
+ *  @hw: pointer to the hw struct
+ *
+ *  Returns true if the firmware has processed all descriptors on the
+ *  admin send queue. Returns false if there are still requests pending.
+ **/
+bool i40e_asq_done(struct i40e_hw *hw)
+{
+       /* AQ designers suggest use of head for better
+        * timing reliability than DD bit
+        */
+       return (rd32(hw, hw->aq.asq.head) == hw->aq.asq.next_to_use);
+
+}
+
+/**
+ *  i40e_asq_send_command - send command to Admin Queue
+ *  @hw: pointer to the hw struct
+ *  @desc: prefilled descriptor describing the command (non DMA mem)
+ *  @buff: buffer to use for indirect commands
+ *  @buff_size: size of buffer for indirect commands
+ *  @opaque: pointer to info to be used in async cleanup
+ *
+ *  This is the main send command driver routine for the Admin Queue send
+ *  queue.  It runs the queue, cleans the queue, etc
+ **/
+i40e_status i40e_asq_send_command(struct i40e_hw *hw,
+                               struct i40e_aq_desc *desc,
+                               void *buff, /* can be NULL */
+                               u16  buff_size,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       i40e_status status = 0;
+       struct i40e_dma_mem *dma_buff = NULL;
+       struct i40e_asq_cmd_details *details;
+       struct i40e_aq_desc *desc_on_ring;
+       bool cmd_completed = false;
+       u16  retval = 0;
+
+       if (hw->aq.asq.count == 0) {
+               i40e_debug(hw, I40E_DEBUG_AQ_MESSAGE,
+                          "AQTX: Admin queue not initialized.\n");
+               status = I40E_ERR_QUEUE_EMPTY;
+               goto asq_send_command_exit;
+       }
+
+       details = I40E_ADMINQ_DETAILS(hw->aq.asq, hw->aq.asq.next_to_use);
+       if (cmd_details) {
+               memcpy(details, cmd_details,
+                      sizeof(struct i40e_asq_cmd_details));
+
+               /* If the cmd_details are defined copy the cookie.  The
+                * cpu_to_le32 is not needed here because the data is ignored
+                * by the FW, only used by the driver
+                */
+               if (details->cookie) {
+                       desc->cookie_high =
+                               cpu_to_le32(upper_32_bits(details->cookie));
+                       desc->cookie_low =
+                               cpu_to_le32(lower_32_bits(details->cookie));
+               }
+       } else {
+               memset(details, 0, sizeof(struct i40e_asq_cmd_details));
+       }
+
+       /* clear requested flags and then set additional flags if defined */
+       desc->flags &= ~cpu_to_le16(details->flags_dis);
+       desc->flags |= cpu_to_le16(details->flags_ena);
+
+       mutex_lock(&hw->aq.asq_mutex);
+
+       if (buff_size > hw->aq.asq_buf_size) {
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQTX: Invalid buffer size: %d.\n",
+                          buff_size);
+               status = I40E_ERR_INVALID_SIZE;
+               goto asq_send_command_error;
+       }
+
+       if (details->postpone && !details->async) {
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQTX: Async flag not set along with postpone flag");
+               status = I40E_ERR_PARAM;
+               goto asq_send_command_error;
+       }
+
+       /* call clean and check queue available function to reclaim the
+        * descriptors that were processed by FW, the function returns the
+        * number of desc available
+        */
+       /* the clean function called here could be called in a separate thread
+        * in case of asynchronous completions
+        */
+       if (i40e_clean_asq(hw) == 0) {
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQTX: Error queue is full.\n");
+               status = I40E_ERR_ADMIN_QUEUE_FULL;
+               goto asq_send_command_error;
+       }
+
+       /* initialize the temp desc pointer with the right desc */
+       desc_on_ring = I40E_ADMINQ_DESC(hw->aq.asq, hw->aq.asq.next_to_use);
+
+       /* if the desc is available copy the temp desc to the right place */
+       memcpy(desc_on_ring, desc, sizeof(struct i40e_aq_desc));
+
+       /* if buff is not NULL assume indirect command */
+       if (buff != NULL) {
+               dma_buff = &(hw->aq.asq.r.asq_bi[hw->aq.asq.next_to_use]);
+               /* copy the user buff into the respective DMA buff */
+               memcpy(dma_buff->va, buff, buff_size);
+               desc_on_ring->datalen = cpu_to_le16(buff_size);
+
+               /* Update the address values in the desc with the pa value
+                * for respective buffer
+                */
+               desc_on_ring->params.external.addr_high =
+                               cpu_to_le32(upper_32_bits(dma_buff->pa));
+               desc_on_ring->params.external.addr_low =
+                               cpu_to_le32(lower_32_bits(dma_buff->pa));
+       }
+
+       /* bump the tail */
+       i40e_debug_aq(hw, I40E_DEBUG_AQ_COMMAND, (void *)desc_on_ring, buff);
+       (hw->aq.asq.next_to_use)++;
+       if (hw->aq.asq.next_to_use == hw->aq.asq.count)
+               hw->aq.asq.next_to_use = 0;
+       if (!details->postpone)
+               wr32(hw, hw->aq.asq.tail, hw->aq.asq.next_to_use);
+
+       /* if cmd_details are not defined or async flag is not set,
+        * we need to wait for desc write back
+        */
+       if (!details->async && !details->postpone) {
+               u32 total_delay = 0;
+               u32 delay_len = 10;
+
+               do {
+                       /* AQ designers suggest use of head for better
+                        * timing reliability than DD bit
+                        */
+                       if (i40e_asq_done(hw))
+                               break;
+                       /* ugh! delay while spin_lock */
+                       udelay(delay_len);
+                       total_delay += delay_len;
+               } while (total_delay <  I40E_ASQ_CMD_TIMEOUT);
+       }
+
+       /* if ready, copy the desc back to temp */
+       if (i40e_asq_done(hw)) {
+               memcpy(desc, desc_on_ring, sizeof(struct i40e_aq_desc));
+               if (buff != NULL)
+                       memcpy(buff, dma_buff->va, buff_size);
+               retval = le16_to_cpu(desc->retval);
+               if (retval != 0) {
+                       i40e_debug(hw,
+                                  I40E_DEBUG_AQ_MESSAGE,
+                                  "AQTX: Command completed with error 0x%X.\n",
+                                  retval);
+                       /* strip off FW internal code */
+                       retval &= 0xff;
+               }
+               cmd_completed = true;
+               if ((enum i40e_admin_queue_err)retval == I40E_AQ_RC_OK)
+                       status = 0;
+               else
+                       status = I40E_ERR_ADMIN_QUEUE_ERROR;
+               hw->aq.asq_last_status = (enum i40e_admin_queue_err)retval;
+       }
+
+       /* update the error if time out occurred */
+       if ((!cmd_completed) &&
+           (!details->async && !details->postpone)) {
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQTX: Writeback timeout.\n");
+               status = I40E_ERR_ADMIN_QUEUE_TIMEOUT;
+       }
+
+asq_send_command_error:
+       mutex_unlock(&hw->aq.asq_mutex);
+asq_send_command_exit:
+       return status;
+}
+
+/**
+ *  i40e_fill_default_direct_cmd_desc - AQ descriptor helper function
+ *  @desc:     pointer to the temp descriptor (non DMA mem)
+ *  @opcode:   the opcode can be used to decide which flags to turn off or on
+ *
+ *  Fill the desc with default values
+ **/
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+                                      u16 opcode)
+{
+       /* zero out the desc */
+       memset((void *)desc, 0, sizeof(struct i40e_aq_desc));
+       desc->opcode = cpu_to_le16(opcode);
+       desc->flags = cpu_to_le16(I40E_AQ_FLAG_EI | I40E_AQ_FLAG_SI);
+}
+
+/**
+ *  i40e_clean_arq_element
+ *  @hw: pointer to the hw struct
+ *  @e: event info from the receive descriptor, includes any buffers
+ *  @pending: number of events that could be left to process
+ *
+ *  This function cleans one Admin Receive Queue element and returns
+ *  the contents through e.  It can also return how many events are
+ *  left to process through 'pending'
+ **/
+i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
+                                            struct i40e_arq_event_info *e,
+                                            u16 *pending)
+{
+       i40e_status ret_code = 0;
+       u16 ntc = hw->aq.arq.next_to_clean;
+       struct i40e_aq_desc *desc;
+       struct i40e_dma_mem *bi;
+       u16 desc_idx;
+       u16 datalen;
+       u16 flags;
+       u16 ntu;
+
+       /* take the lock before we start messing with the ring */
+       mutex_lock(&hw->aq.arq_mutex);
+
+       /* set next_to_use to head */
+       ntu = (rd32(hw, hw->aq.arq.head) & I40E_PF_ARQH_ARQH_MASK);
+       if (ntu == ntc) {
+               /* nothing to do - shouldn't need to update ring's values */
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQRX: Queue is empty.\n");
+               ret_code = I40E_ERR_ADMIN_QUEUE_NO_WORK;
+               goto clean_arq_element_out;
+       }
+
+       /* now clean the next descriptor */
+       desc = I40E_ADMINQ_DESC(hw->aq.arq, ntc);
+       desc_idx = ntc;
+       i40e_debug_aq(hw,
+                     I40E_DEBUG_AQ_COMMAND,
+                     (void *)desc,
+                     hw->aq.arq.r.arq_bi[desc_idx].va);
+
+       flags = le16_to_cpu(desc->flags);
+       if (flags & I40E_AQ_FLAG_ERR) {
+               ret_code = I40E_ERR_ADMIN_QUEUE_ERROR;
+               hw->aq.arq_last_status =
+                       (enum i40e_admin_queue_err)le16_to_cpu(desc->retval);
+               i40e_debug(hw,
+                          I40E_DEBUG_AQ_MESSAGE,
+                          "AQRX: Event received with error 0x%X.\n",
+                          hw->aq.arq_last_status);
+       } else {
+               memcpy(&e->desc, desc, sizeof(struct i40e_aq_desc));
+               datalen = le16_to_cpu(desc->datalen);
+               e->msg_size = min(datalen, e->msg_size);
+               if (e->msg_buf != NULL && (e->msg_size != 0))
+                       memcpy(e->msg_buf, hw->aq.arq.r.arq_bi[desc_idx].va,
+                              e->msg_size);
+       }
+
+       /* Restore the original datalen and buffer address in the desc,
+        * FW updates datalen to indicate the event message
+        * size
+        */
+       bi = &hw->aq.arq.r.arq_bi[ntc];
+       desc->datalen = cpu_to_le16((u16)bi->size);
+       desc->params.external.addr_high = cpu_to_le32(upper_32_bits(bi->pa));
+       desc->params.external.addr_low = cpu_to_le32(lower_32_bits(bi->pa));
+
+       /* set tail = the last cleaned desc index. */
+       wr32(hw, hw->aq.arq.tail, ntc);
+       /* ntc is updated to tail + 1 */
+       ntc++;
+       if (ntc == hw->aq.num_arq_entries)
+               ntc = 0;
+       hw->aq.arq.next_to_clean = ntc;
+       hw->aq.arq.next_to_use = ntu;
+
+clean_arq_element_out:
+       /* Set pending if needed, unlock and return */
+       if (pending != NULL)
+               *pending = (ntc > ntu ? hw->aq.arq.count : 0) + (ntu - ntc);
+       mutex_unlock(&hw->aq.arq_mutex);
+
+       return ret_code;
+}
+
+void i40e_resume_aq(struct i40e_hw *hw)
+{
+       u32 reg = 0;
+
+       /* Registers are reset after PF reset */
+       hw->aq.asq.next_to_use = 0;
+       hw->aq.asq.next_to_clean = 0;
+
+       i40e_config_asq_regs(hw);
+       reg = hw->aq.num_asq_entries;
+
+       if (hw->mac.type == I40E_MAC_VF) {
+               reg |= I40E_VF_ATQLEN_ATQENABLE_MASK;
+               wr32(hw, I40E_VF_ATQLEN1, reg);
+       } else {
+               reg |= I40E_PF_ATQLEN_ATQENABLE_MASK;
+               wr32(hw, I40E_PF_ATQLEN, reg);
+       }
+
+       hw->aq.arq.next_to_use = 0;
+       hw->aq.arq.next_to_clean = 0;
+
+       i40e_config_arq_regs(hw);
+       reg = hw->aq.num_arq_entries;
+
+       if (hw->mac.type == I40E_MAC_VF) {
+               reg |= I40E_VF_ATQLEN_ATQENABLE_MASK;
+               wr32(hw, I40E_VF_ARQLEN1, reg);
+       } else {
+               reg |= I40E_PF_ATQLEN_ATQENABLE_MASK;
+               wr32(hw, I40E_PF_ARQLEN, reg);
+       }
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.h b/drivers/net/ethernet/intel/i40e/i40e_adminq.h
new file mode 100644 (file)
index 0000000..22e5ed6
--- /dev/null
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_ADMINQ_H_
+#define _I40E_ADMINQ_H_
+
+#include "i40e_osdep.h"
+#include "i40e_adminq_cmd.h"
+
+#define I40E_ADMINQ_DESC(R, i)   \
+       (&(((struct i40e_aq_desc *)((R).desc))[i]))
+
+#define I40E_ADMINQ_DESC_ALIGNMENT 4096
+
+struct i40e_adminq_ring {
+       void *desc;             /* Descriptor ring memory */
+       void *details;          /* ASQ details */
+
+       union {
+               struct i40e_dma_mem *asq_bi;
+               struct i40e_dma_mem *arq_bi;
+       } r;
+
+       u64 dma_addr;           /* Physical address of the ring */
+       u16 count;              /* Number of descriptors */
+       u16 rx_buf_len;         /* Admin Receive Queue buffer length */
+
+       /* used for interrupt processing */
+       u16 next_to_use;
+       u16 next_to_clean;
+
+       /* used for queue tracking */
+       u32 head;
+       u32 tail;
+};
+
+/* ASQ transaction details */
+struct i40e_asq_cmd_details {
+       void *callback; /* cast from type I40E_ADMINQ_CALLBACK */
+       u64 cookie;
+       u16 flags_ena;
+       u16 flags_dis;
+       bool async;
+       bool postpone;
+};
+
+#define I40E_ADMINQ_DETAILS(R, i)   \
+       (&(((struct i40e_asq_cmd_details *)((R).details))[i]))
+
+/* ARQ event information */
+struct i40e_arq_event_info {
+       struct i40e_aq_desc desc;
+       u16 msg_size;
+       u8 *msg_buf;
+};
+
+/* Admin Queue information */
+struct i40e_adminq_info {
+       struct i40e_adminq_ring arq;    /* receive queue */
+       struct i40e_adminq_ring asq;    /* send queue */
+       u16 num_arq_entries;            /* receive queue depth */
+       u16 num_asq_entries;            /* send queue depth */
+       u16 arq_buf_size;               /* receive queue buffer size */
+       u16 asq_buf_size;               /* send queue buffer size */
+       u16 fw_maj_ver;                 /* firmware major version */
+       u16 fw_min_ver;                 /* firmware minor version */
+       u16 api_maj_ver;                /* api major version */
+       u16 api_min_ver;                /* api minor version */
+
+       struct mutex asq_mutex; /* Send queue lock */
+       struct mutex arq_mutex; /* Receive queue lock */
+
+       struct i40e_dma_mem asq_mem;    /* send queue dynamic memory */
+       struct i40e_dma_mem arq_mem;    /* receive queue dynamic memory */
+
+       /* last status values on send and receive queues */
+       enum i40e_admin_queue_err asq_last_status;
+       enum i40e_admin_queue_err arq_last_status;
+};
+
+/* general information */
+#define I40E_AQ_LARGE_BUF      512
+#define I40E_ASQ_CMD_TIMEOUT   100000  /* usecs */
+
+void i40e_fill_default_direct_cmd_desc(struct i40e_aq_desc *desc,
+                                      u16 opcode);
+
+#endif /* _I40E_ADMINQ_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h b/drivers/net/ethernet/intel/i40e/i40e_adminq_cmd.h
new file mode 100644 (file)
index 0000000..e61ebdd
--- /dev/null
@@ -0,0 +1,2076 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_ADMINQ_CMD_H_
+#define _I40E_ADMINQ_CMD_H_
+
+/* This header file defines the i40e Admin Queue commands and is shared between
+ * i40e Firmware and Software.
+ *
+ * This file needs to comply with the Linux Kernel coding style.
+ */
+
+#define I40E_FW_API_VERSION_MAJOR  0x0001
+#define I40E_FW_API_VERSION_MINOR  0x0000
+
+struct i40e_aq_desc {
+       __le16 flags;
+       __le16 opcode;
+       __le16 datalen;
+       __le16 retval;
+       __le32 cookie_high;
+       __le32 cookie_low;
+       union {
+               struct {
+                       __le32 param0;
+                       __le32 param1;
+                       __le32 param2;
+                       __le32 param3;
+               } internal;
+               struct {
+                       __le32 param0;
+                       __le32 param1;
+                       __le32 addr_high;
+                       __le32 addr_low;
+               } external;
+               u8 raw[16];
+       } params;
+};
+
+/* Flags sub-structure
+ * |0  |1  |2  |3  |4  |5  |6  |7  |8  |9  |10 |11 |12 |13 |14 |15 |
+ * |DD |CMP|ERR|VFE| * *  RESERVED * * |LB |RD |VFC|BUF|SI |EI |FE |
+ */
+
+/* command flags and offsets*/
+#define I40E_AQ_FLAG_DD_SHIFT  0
+#define I40E_AQ_FLAG_CMP_SHIFT 1
+#define I40E_AQ_FLAG_ERR_SHIFT 2
+#define I40E_AQ_FLAG_VFE_SHIFT 3
+#define I40E_AQ_FLAG_LB_SHIFT  9
+#define I40E_AQ_FLAG_RD_SHIFT  10
+#define I40E_AQ_FLAG_VFC_SHIFT 11
+#define I40E_AQ_FLAG_BUF_SHIFT 12
+#define I40E_AQ_FLAG_SI_SHIFT  13
+#define I40E_AQ_FLAG_EI_SHIFT  14
+#define I40E_AQ_FLAG_FE_SHIFT  15
+
+#define I40E_AQ_FLAG_DD  (1 << I40E_AQ_FLAG_DD_SHIFT)  /* 0x1    */
+#define I40E_AQ_FLAG_CMP (1 << I40E_AQ_FLAG_CMP_SHIFT) /* 0x2    */
+#define I40E_AQ_FLAG_ERR (1 << I40E_AQ_FLAG_ERR_SHIFT) /* 0x4    */
+#define I40E_AQ_FLAG_VFE (1 << I40E_AQ_FLAG_VFE_SHIFT) /* 0x8    */
+#define I40E_AQ_FLAG_LB  (1 << I40E_AQ_FLAG_LB_SHIFT)  /* 0x200  */
+#define I40E_AQ_FLAG_RD  (1 << I40E_AQ_FLAG_RD_SHIFT)  /* 0x400  */
+#define I40E_AQ_FLAG_VFC (1 << I40E_AQ_FLAG_VFC_SHIFT) /* 0x800  */
+#define I40E_AQ_FLAG_BUF (1 << I40E_AQ_FLAG_BUF_SHIFT) /* 0x1000 */
+#define I40E_AQ_FLAG_SI  (1 << I40E_AQ_FLAG_SI_SHIFT)  /* 0x2000 */
+#define I40E_AQ_FLAG_EI  (1 << I40E_AQ_FLAG_EI_SHIFT)  /* 0x4000 */
+#define I40E_AQ_FLAG_FE  (1 << I40E_AQ_FLAG_FE_SHIFT)  /* 0x8000 */
+
+/* error codes */
+enum i40e_admin_queue_err {
+       I40E_AQ_RC_OK       = 0,    /* success */
+       I40E_AQ_RC_EPERM    = 1,    /* Operation not permitted */
+       I40E_AQ_RC_ENOENT   = 2,    /* No such element */
+       I40E_AQ_RC_ESRCH    = 3,    /* Bad opcode */
+       I40E_AQ_RC_EINTR    = 4,    /* operation interrupted */
+       I40E_AQ_RC_EIO      = 5,    /* I/O error */
+       I40E_AQ_RC_ENXIO    = 6,    /* No such resource */
+       I40E_AQ_RC_E2BIG    = 7,    /* Arg too long */
+       I40E_AQ_RC_EAGAIN   = 8,    /* Try again */
+       I40E_AQ_RC_ENOMEM   = 9,    /* Out of memory */
+       I40E_AQ_RC_EACCES   = 10,   /* Permission denied */
+       I40E_AQ_RC_EFAULT   = 11,   /* Bad address */
+       I40E_AQ_RC_EBUSY    = 12,   /* Device or resource busy */
+       I40E_AQ_RC_EEXIST   = 13,   /* object already exists */
+       I40E_AQ_RC_EINVAL   = 14,   /* Invalid argument */
+       I40E_AQ_RC_ENOTTY   = 15,   /* Not a typewriter */
+       I40E_AQ_RC_ENOSPC   = 16,   /* No space left or alloc failure */
+       I40E_AQ_RC_ENOSYS   = 17,   /* Function not implemented */
+       I40E_AQ_RC_ERANGE   = 18,   /* Parameter out of range */
+       I40E_AQ_RC_EFLUSHED = 19,   /* Cmd flushed because of prev cmd error */
+       I40E_AQ_RC_BAD_ADDR = 20,   /* Descriptor contains a bad pointer */
+       I40E_AQ_RC_EMODE    = 21,   /* Op not allowed in current dev mode */
+       I40E_AQ_RC_EFBIG    = 22,   /* File too large */
+};
+
+/* Admin Queue command opcodes */
+enum i40e_admin_queue_opc {
+       /* aq commands */
+       i40e_aqc_opc_get_version      = 0x0001,
+       i40e_aqc_opc_driver_version   = 0x0002,
+       i40e_aqc_opc_queue_shutdown   = 0x0003,
+
+       /* resource ownership */
+       i40e_aqc_opc_request_resource = 0x0008,
+       i40e_aqc_opc_release_resource = 0x0009,
+
+       i40e_aqc_opc_list_func_capabilities = 0x000A,
+       i40e_aqc_opc_list_dev_capabilities  = 0x000B,
+
+       i40e_aqc_opc_set_cppm_configuration = 0x0103,
+       i40e_aqc_opc_set_arp_proxy_entry    = 0x0104,
+       i40e_aqc_opc_set_ns_proxy_entry     = 0x0105,
+
+       /* LAA */
+       i40e_aqc_opc_mng_laa                = 0x0106,
+       i40e_aqc_opc_mac_address_read       = 0x0107,
+       i40e_aqc_opc_mac_address_write      = 0x0108,
+
+       /* internal switch commands */
+       i40e_aqc_opc_get_switch_config         = 0x0200,
+       i40e_aqc_opc_add_statistics            = 0x0201,
+       i40e_aqc_opc_remove_statistics         = 0x0202,
+       i40e_aqc_opc_set_port_parameters       = 0x0203,
+       i40e_aqc_opc_get_switch_resource_alloc = 0x0204,
+
+       i40e_aqc_opc_add_vsi                = 0x0210,
+       i40e_aqc_opc_update_vsi_parameters  = 0x0211,
+       i40e_aqc_opc_get_vsi_parameters     = 0x0212,
+
+       i40e_aqc_opc_add_pv                = 0x0220,
+       i40e_aqc_opc_update_pv_parameters  = 0x0221,
+       i40e_aqc_opc_get_pv_parameters     = 0x0222,
+
+       i40e_aqc_opc_add_veb               = 0x0230,
+       i40e_aqc_opc_update_veb_parameters = 0x0231,
+       i40e_aqc_opc_get_veb_parameters    = 0x0232,
+
+       i40e_aqc_opc_delete_element  = 0x0243,
+
+       i40e_aqc_opc_add_macvlan                  = 0x0250,
+       i40e_aqc_opc_remove_macvlan               = 0x0251,
+       i40e_aqc_opc_add_vlan                     = 0x0252,
+       i40e_aqc_opc_remove_vlan                  = 0x0253,
+       i40e_aqc_opc_set_vsi_promiscuous_modes    = 0x0254,
+       i40e_aqc_opc_add_tag                      = 0x0255,
+       i40e_aqc_opc_remove_tag                   = 0x0256,
+       i40e_aqc_opc_add_multicast_etag           = 0x0257,
+       i40e_aqc_opc_remove_multicast_etag        = 0x0258,
+       i40e_aqc_opc_update_tag                   = 0x0259,
+       i40e_aqc_opc_add_control_packet_filter    = 0x025A,
+       i40e_aqc_opc_remove_control_packet_filter = 0x025B,
+       i40e_aqc_opc_add_cloud_filters            = 0x025C,
+       i40e_aqc_opc_remove_cloud_filters         = 0x025D,
+
+       i40e_aqc_opc_add_mirror_rule    = 0x0260,
+       i40e_aqc_opc_delete_mirror_rule = 0x0261,
+
+       i40e_aqc_opc_set_storm_control_config = 0x0280,
+       i40e_aqc_opc_get_storm_control_config = 0x0281,
+
+       /* DCB commands */
+       i40e_aqc_opc_dcb_ignore_pfc = 0x0301,
+       i40e_aqc_opc_dcb_updated    = 0x0302,
+
+       /* TX scheduler */
+       i40e_aqc_opc_configure_vsi_bw_limit            = 0x0400,
+       i40e_aqc_opc_configure_vsi_ets_sla_bw_limit    = 0x0406,
+       i40e_aqc_opc_configure_vsi_tc_bw               = 0x0407,
+       i40e_aqc_opc_query_vsi_bw_config               = 0x0408,
+       i40e_aqc_opc_query_vsi_ets_sla_config          = 0x040A,
+       i40e_aqc_opc_configure_switching_comp_bw_limit = 0x0410,
+
+       i40e_aqc_opc_enable_switching_comp_ets             = 0x0413,
+       i40e_aqc_opc_modify_switching_comp_ets             = 0x0414,
+       i40e_aqc_opc_disable_switching_comp_ets            = 0x0415,
+       i40e_aqc_opc_configure_switching_comp_ets_bw_limit = 0x0416,
+       i40e_aqc_opc_configure_switching_comp_bw_config    = 0x0417,
+       i40e_aqc_opc_query_switching_comp_ets_config       = 0x0418,
+       i40e_aqc_opc_query_port_ets_config                 = 0x0419,
+       i40e_aqc_opc_query_switching_comp_bw_config        = 0x041A,
+       i40e_aqc_opc_suspend_port_tx                       = 0x041B,
+       i40e_aqc_opc_resume_port_tx                        = 0x041C,
+
+       /* hmc */
+       i40e_aqc_opc_query_hmc_resource_profile = 0x0500,
+       i40e_aqc_opc_set_hmc_resource_profile   = 0x0501,
+
+       /* phy commands*/
+       i40e_aqc_opc_get_phy_abilities   = 0x0600,
+       i40e_aqc_opc_set_phy_config      = 0x0601,
+       i40e_aqc_opc_set_mac_config      = 0x0603,
+       i40e_aqc_opc_set_link_restart_an = 0x0605,
+       i40e_aqc_opc_get_link_status     = 0x0607,
+       i40e_aqc_opc_set_phy_int_mask    = 0x0613,
+       i40e_aqc_opc_get_local_advt_reg  = 0x0614,
+       i40e_aqc_opc_set_local_advt_reg  = 0x0615,
+       i40e_aqc_opc_get_partner_advt    = 0x0616,
+       i40e_aqc_opc_set_lb_modes        = 0x0618,
+       i40e_aqc_opc_get_phy_wol_caps    = 0x0621,
+       i40e_aqc_opc_set_phy_reset       = 0x0622,
+       i40e_aqc_opc_upload_ext_phy_fm   = 0x0625,
+
+       /* NVM commands */
+       i40e_aqc_opc_nvm_read   = 0x0701,
+       i40e_aqc_opc_nvm_erase  = 0x0702,
+       i40e_aqc_opc_nvm_update = 0x0703,
+
+       /* virtualization commands */
+       i40e_aqc_opc_send_msg_to_pf   = 0x0801,
+       i40e_aqc_opc_send_msg_to_vf   = 0x0802,
+       i40e_aqc_opc_send_msg_to_peer = 0x0803,
+
+       /* alternate structure */
+       i40e_aqc_opc_alternate_write          = 0x0900,
+       i40e_aqc_opc_alternate_write_indirect = 0x0901,
+       i40e_aqc_opc_alternate_read           = 0x0902,
+       i40e_aqc_opc_alternate_read_indirect  = 0x0903,
+       i40e_aqc_opc_alternate_write_done     = 0x0904,
+       i40e_aqc_opc_alternate_set_mode       = 0x0905,
+       i40e_aqc_opc_alternate_clear_port     = 0x0906,
+
+       /* LLDP commands */
+       i40e_aqc_opc_lldp_get_mib    = 0x0A00,
+       i40e_aqc_opc_lldp_update_mib = 0x0A01,
+       i40e_aqc_opc_lldp_add_tlv    = 0x0A02,
+       i40e_aqc_opc_lldp_update_tlv = 0x0A03,
+       i40e_aqc_opc_lldp_delete_tlv = 0x0A04,
+       i40e_aqc_opc_lldp_stop       = 0x0A05,
+       i40e_aqc_opc_lldp_start      = 0x0A06,
+
+       /* Tunnel commands */
+       i40e_aqc_opc_add_udp_tunnel       = 0x0B00,
+       i40e_aqc_opc_del_udp_tunnel       = 0x0B01,
+       i40e_aqc_opc_tunnel_key_structure = 0x0B10,
+
+       /* Async Events */
+       i40e_aqc_opc_event_lan_overflow = 0x1001,
+
+       /* OEM commands */
+       i40e_aqc_opc_oem_parameter_change     = 0xFE00,
+       i40e_aqc_opc_oem_device_status_change = 0xFE01,
+
+       /* debug commands */
+       i40e_aqc_opc_debug_get_deviceid     = 0xFF00,
+       i40e_aqc_opc_debug_set_mode         = 0xFF01,
+       i40e_aqc_opc_debug_read_reg         = 0xFF03,
+       i40e_aqc_opc_debug_write_reg        = 0xFF04,
+       i40e_aqc_opc_debug_read_reg_sg      = 0xFF05,
+       i40e_aqc_opc_debug_write_reg_sg     = 0xFF06,
+       i40e_aqc_opc_debug_modify_reg       = 0xFF07,
+       i40e_aqc_opc_debug_dump_internals   = 0xFF08,
+       i40e_aqc_opc_debug_modify_internals = 0xFF09,
+};
+
+/* command structures and indirect data structures */
+
+/* Structure naming conventions:
+ * - no suffix for direct command descriptor structures
+ * - _data for indirect sent data
+ * - _resp for indirect return data (data which is both will use _data)
+ * - _completion for direct return data
+ * - _element_ for repeated elements (may also be _data or _resp)
+ *
+ * Command structures are expected to overlay the params.raw member of the basic
+ * descriptor, and as such cannot exceed 16 bytes in length.
+ */
+
+/* This macro is used to generate a compilation error if a structure
+ * is not exactly the correct length. It gives a divide by zero error if the
+ * structure is not of the correct size, otherwise it creates an enum that is
+ * never used.
+ */
+#define I40E_CHECK_STRUCT_LEN(n, X) enum i40e_static_assert_enum_##X \
+       { i40e_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
+
+/* This macro is used extensively to ensure that command structures are 16
+ * bytes in length as they have to map to the raw array of that size.
+ */
+#define I40E_CHECK_CMD_LENGTH(X) I40E_CHECK_STRUCT_LEN(16, X)
+
+/* internal (0x00XX) commands */
+
+/* Get version (direct 0x0001) */
+struct i40e_aqc_get_version {
+       __le32 rom_ver;
+       __le32 fw_build;
+       __le16 fw_major;
+       __le16 fw_minor;
+       __le16 api_major;
+       __le16 api_minor;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_version);
+
+/* Send driver version (direct 0x0002) */
+struct i40e_aqc_driver_version {
+       u8     driver_major_ver;
+       u8     driver_minor_ver;
+       u8     driver_build_ver;
+       u8     driver_subbuild_ver;
+       u8     reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_driver_version);
+
+/* Queue Shutdown (direct 0x0003) */
+struct i40e_aqc_queue_shutdown {
+       __le32     driver_unloading;
+#define I40E_AQ_DRIVER_UNLOADING    0x1
+       u8     reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_queue_shutdown);
+
+/* Request resource ownership (direct 0x0008)
+ * Release resource ownership (direct 0x0009)
+ */
+#define I40E_AQ_RESOURCE_NVM               1
+#define I40E_AQ_RESOURCE_SDP               2
+#define I40E_AQ_RESOURCE_ACCESS_READ       1
+#define I40E_AQ_RESOURCE_ACCESS_WRITE      2
+#define I40E_AQ_RESOURCE_NVM_READ_TIMEOUT  3000
+#define I40E_AQ_RESOURCE_NVM_WRITE_TIMEOUT 180000
+
+struct i40e_aqc_request_resource {
+       __le16 resource_id;
+       __le16 access_type;
+       __le32 timeout;
+       __le32 resource_number;
+       u8     reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_request_resource);
+
+/* Get function capabilities (indirect 0x000A)
+ * Get device capabilities (indirect 0x000B)
+ */
+struct i40e_aqc_list_capabilites {
+       u8 command_flags;
+#define I40E_AQ_LIST_CAP_PF_INDEX_EN     1
+       u8 pf_index;
+       u8 reserved[2];
+       __le32 count;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_list_capabilites);
+
+struct i40e_aqc_list_capabilities_element_resp {
+       __le16 id;
+       u8     major_rev;
+       u8     minor_rev;
+       __le32 number;
+       __le32 logical_id;
+       __le32 phys_id;
+       u8     reserved[16];
+};
+
+/* list of caps */
+
+#define I40E_AQ_CAP_ID_SWITCH_MODE      0x0001
+#define I40E_AQ_CAP_ID_MNG_MODE         0x0002
+#define I40E_AQ_CAP_ID_NPAR_ACTIVE      0x0003
+#define I40E_AQ_CAP_ID_OS2BMC_CAP       0x0004
+#define I40E_AQ_CAP_ID_FUNCTIONS_VALID  0x0005
+#define I40E_AQ_CAP_ID_ALTERNATE_RAM    0x0006
+#define I40E_AQ_CAP_ID_SRIOV            0x0012
+#define I40E_AQ_CAP_ID_VF               0x0013
+#define I40E_AQ_CAP_ID_VMDQ             0x0014
+#define I40E_AQ_CAP_ID_8021QBG          0x0015
+#define I40E_AQ_CAP_ID_8021QBR          0x0016
+#define I40E_AQ_CAP_ID_VSI              0x0017
+#define I40E_AQ_CAP_ID_DCB              0x0018
+#define I40E_AQ_CAP_ID_FCOE             0x0021
+#define I40E_AQ_CAP_ID_RSS              0x0040
+#define I40E_AQ_CAP_ID_RXQ              0x0041
+#define I40E_AQ_CAP_ID_TXQ              0x0042
+#define I40E_AQ_CAP_ID_MSIX             0x0043
+#define I40E_AQ_CAP_ID_VF_MSIX          0x0044
+#define I40E_AQ_CAP_ID_FLOW_DIRECTOR    0x0045
+#define I40E_AQ_CAP_ID_1588             0x0046
+#define I40E_AQ_CAP_ID_IWARP            0x0051
+#define I40E_AQ_CAP_ID_LED              0x0061
+#define I40E_AQ_CAP_ID_SDP              0x0062
+#define I40E_AQ_CAP_ID_MDIO             0x0063
+#define I40E_AQ_CAP_ID_FLEX10           0x00F1
+#define I40E_AQ_CAP_ID_CEM              0x00F2
+
+/* Set CPPM Configuration (direct 0x0103) */
+struct i40e_aqc_cppm_configuration {
+       __le16 command_flags;
+#define I40E_AQ_CPPM_EN_LTRC    0x0800
+#define I40E_AQ_CPPM_EN_DMCTH   0x1000
+#define I40E_AQ_CPPM_EN_DMCTLX  0x2000
+#define I40E_AQ_CPPM_EN_HPTC    0x4000
+#define I40E_AQ_CPPM_EN_DMARC   0x8000
+       __le16 ttlx;
+       __le32 dmacr;
+       __le16 dmcth;
+       u8     hptc;
+       u8     reserved;
+       __le32 pfltrc;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_cppm_configuration);
+
+/* Set ARP Proxy command / response (indirect 0x0104) */
+struct i40e_aqc_arp_proxy_data {
+       __le16 command_flags;
+#define I40E_AQ_ARP_INIT_IPV4           0x0008
+#define I40E_AQ_ARP_UNSUP_CTL           0x0010
+#define I40E_AQ_ARP_ENA                 0x0020
+#define I40E_AQ_ARP_ADD_IPV4            0x0040
+#define I40E_AQ_ARP_DEL_IPV4            0x0080
+       __le16 table_id;
+       __le32 pfpm_proxyfc;
+       __le32 ip_addr;
+       u8     mac_addr[6];
+};
+
+/* Set NS Proxy Table Entry Command (indirect 0x0105) */
+struct i40e_aqc_ns_proxy_data {
+       __le16 table_idx_mac_addr_0;
+       __le16 table_idx_mac_addr_1;
+       __le16 table_idx_ipv6_0;
+       __le16 table_idx_ipv6_1;
+       __le16 control;
+#define I40E_AQ_NS_PROXY_ADD_0             0x0100
+#define I40E_AQ_NS_PROXY_DEL_0             0x0200
+#define I40E_AQ_NS_PROXY_ADD_1             0x0400
+#define I40E_AQ_NS_PROXY_DEL_1             0x0800
+#define I40E_AQ_NS_PROXY_ADD_IPV6_0        0x1000
+#define I40E_AQ_NS_PROXY_DEL_IPV6_0        0x2000
+#define I40E_AQ_NS_PROXY_ADD_IPV6_1        0x4000
+#define I40E_AQ_NS_PROXY_DEL_IPV6_1        0x8000
+#define I40E_AQ_NS_PROXY_COMMAND_SEQ       0x0001
+#define I40E_AQ_NS_PROXY_INIT_IPV6_TBL     0x0002
+#define I40E_AQ_NS_PROXY_INIT_MAC_TBL      0x0004
+       u8     mac_addr_0[6];
+       u8     mac_addr_1[6];
+       u8     local_mac_addr[6];
+       u8     ipv6_addr_0[16]; /* Warning! spec specifies BE byte order */
+       u8     ipv6_addr_1[16];
+};
+
+/* Manage LAA Command (0x0106) - obsolete */
+struct i40e_aqc_mng_laa {
+       __le16  command_flags;
+#define I40E_AQ_LAA_FLAG_WR   0x8000
+       u8     reserved[2];
+       __le32 sal;
+       __le16 sah;
+       u8     reserved2[6];
+};
+
+/* Manage MAC Address Read Command (0x0107) */
+struct i40e_aqc_mac_address_read {
+       __le16  command_flags;
+#define I40E_AQC_LAN_ADDR_VALID   0x10
+#define I40E_AQC_SAN_ADDR_VALID   0x20
+#define I40E_AQC_PORT_ADDR_VALID  0x40
+#define I40E_AQC_WOL_ADDR_VALID   0x80
+#define I40E_AQC_ADDR_VALID_MASK  0xf0
+       u8     reserved[6];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_read);
+
+struct i40e_aqc_mac_address_read_data {
+       u8 pf_lan_mac[6];
+       u8 pf_san_mac[6];
+       u8 port_mac[6];
+       u8 pf_wol_mac[6];
+};
+
+I40E_CHECK_STRUCT_LEN(24, i40e_aqc_mac_address_read_data);
+
+/* Manage MAC Address Write Command (0x0108) */
+struct i40e_aqc_mac_address_write {
+       __le16 command_flags;
+#define I40E_AQC_WRITE_TYPE_LAA_ONLY    0x0000
+#define I40E_AQC_WRITE_TYPE_LAA_WOL     0x4000
+#define I40E_AQC_WRITE_TYPE_PORT        0x8000
+#define I40E_AQC_WRITE_TYPE_MASK        0xc000
+       __le16 mac_sah;
+       __le32 mac_sal;
+       u8     reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_mac_address_write);
+
+/* Switch configuration commands (0x02xx) */
+
+/* Used by many indirect commands that only pass an seid and a buffer in the
+ * command
+ */
+struct i40e_aqc_switch_seid {
+       __le16 seid;
+       u8     reserved[6];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_switch_seid);
+
+/* Get Switch Configuration command (indirect 0x0200)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_switch_config_header_resp {
+       __le16 num_reported;
+       __le16 num_total;
+       u8     reserved[12];
+};
+
+struct i40e_aqc_switch_config_element_resp {
+       u8     element_type;
+#define I40E_AQ_SW_ELEM_TYPE_MAC        1
+#define I40E_AQ_SW_ELEM_TYPE_PF         2
+#define I40E_AQ_SW_ELEM_TYPE_VF         3
+#define I40E_AQ_SW_ELEM_TYPE_EMP        4
+#define I40E_AQ_SW_ELEM_TYPE_BMC        5
+#define I40E_AQ_SW_ELEM_TYPE_PV         16
+#define I40E_AQ_SW_ELEM_TYPE_VEB        17
+#define I40E_AQ_SW_ELEM_TYPE_PA         18
+#define I40E_AQ_SW_ELEM_TYPE_VSI        19
+       u8     revision;
+#define I40E_AQ_SW_ELEM_REV_1           1
+       __le16 seid;
+       __le16 uplink_seid;
+       __le16 downlink_seid;
+       u8     reserved[3];
+       u8     connection_type;
+#define I40E_AQ_CONN_TYPE_REGULAR       0x1
+#define I40E_AQ_CONN_TYPE_DEFAULT       0x2
+#define I40E_AQ_CONN_TYPE_CASCADED      0x3
+       __le16 scheduler_id;
+       __le16 element_info;
+};
+
+/* Get Switch Configuration (indirect 0x0200)
+ *    an array of elements are returned in the response buffer
+ *    the first in the array is the header, remainder are elements
+ */
+struct i40e_aqc_get_switch_config_resp {
+       struct i40e_aqc_get_switch_config_header_resp header;
+       struct i40e_aqc_switch_config_element_resp    element[1];
+};
+
+/* Add Statistics (direct 0x0201)
+ * Remove Statistics (direct 0x0202)
+ */
+struct i40e_aqc_add_remove_statistics {
+       __le16 seid;
+       __le16 vlan;
+       __le16 stat_index;
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_statistics);
+
+/* Set Port Parameters command (direct 0x0203) */
+struct i40e_aqc_set_port_parameters {
+       __le16 command_flags;
+#define I40E_AQ_SET_P_PARAMS_SAVE_BAD_PACKETS   1
+#define I40E_AQ_SET_P_PARAMS_PAD_SHORT_PACKETS  2 /* must set! */
+#define I40E_AQ_SET_P_PARAMS_DOUBLE_VLAN_ENA    4
+       __le16 bad_frame_vsi;
+       __le16 default_seid;        /* reserved for command */
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_port_parameters);
+
+/* Get Switch Resource Allocation (indirect 0x0204) */
+struct i40e_aqc_get_switch_resource_alloc {
+       u8     num_entries;         /* reserved for command */
+       u8     reserved[7];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_switch_resource_alloc);
+
+/* expect an array of these structs in the response buffer */
+struct i40e_aqc_switch_resource_alloc_element_resp {
+       u8     resource_type;
+#define I40E_AQ_RESOURCE_TYPE_VEB                 0x0
+#define I40E_AQ_RESOURCE_TYPE_VSI                 0x1
+#define I40E_AQ_RESOURCE_TYPE_MACADDR             0x2
+#define I40E_AQ_RESOURCE_TYPE_STAG                0x3
+#define I40E_AQ_RESOURCE_TYPE_ETAG                0x4
+#define I40E_AQ_RESOURCE_TYPE_MULTICAST_HASH      0x5
+#define I40E_AQ_RESOURCE_TYPE_UNICAST_HASH        0x6
+#define I40E_AQ_RESOURCE_TYPE_VLAN                0x7
+#define I40E_AQ_RESOURCE_TYPE_VSI_LIST_ENTRY      0x8
+#define I40E_AQ_RESOURCE_TYPE_ETAG_LIST_ENTRY     0x9
+#define I40E_AQ_RESOURCE_TYPE_VLAN_STAT_POOL      0xA
+#define I40E_AQ_RESOURCE_TYPE_MIRROR_RULE         0xB
+#define I40E_AQ_RESOURCE_TYPE_QUEUE_SETS          0xC
+#define I40E_AQ_RESOURCE_TYPE_VLAN_FILTERS        0xD
+#define I40E_AQ_RESOURCE_TYPE_INNER_MAC_FILTERS   0xF
+#define I40E_AQ_RESOURCE_TYPE_IP_FILTERS          0x10
+#define I40E_AQ_RESOURCE_TYPE_GRE_VN_KEYS         0x11
+#define I40E_AQ_RESOURCE_TYPE_VN2_KEYS            0x12
+#define I40E_AQ_RESOURCE_TYPE_TUNNEL_PORTS        0x13
+       u8     reserved1;
+       __le16 guaranteed;
+       __le16 total;
+       __le16 used;
+       __le16 total_unalloced;
+       u8     reserved2[6];
+};
+
+/* Add VSI (indirect 0x210)
+ *    this indirect command uses struct i40e_aqc_vsi_properties_data
+ *    as the indirect buffer (128 bytes)
+ *
+ * Update VSI (indirect 0x211) Get VSI (indirect 0x0212)
+ *    use the generic i40e_aqc_switch_seid descriptor format
+ *    use the same completion and data structure as Add VSI
+ */
+struct i40e_aqc_add_get_update_vsi {
+       __le16 uplink_seid;
+       u8     connection_type;
+#define I40E_AQ_VSI_CONN_TYPE_NORMAL            0x1
+#define I40E_AQ_VSI_CONN_TYPE_DEFAULT           0x2
+#define I40E_AQ_VSI_CONN_TYPE_CASCADED          0x3
+       u8     reserved1;
+       u8     vf_id;
+       u8     reserved2;
+       __le16 vsi_flags;
+#define I40E_AQ_VSI_TYPE_SHIFT          0x0
+#define I40E_AQ_VSI_TYPE_MASK           (0x3 << I40E_AQ_VSI_TYPE_SHIFT)
+#define I40E_AQ_VSI_TYPE_VF             0x0
+#define I40E_AQ_VSI_TYPE_VMDQ2          0x1
+#define I40E_AQ_VSI_TYPE_PF             0x2
+#define I40E_AQ_VSI_TYPE_EMP_MNG        0x3
+#define I40E_AQ_VSI_FLAG_CASCADED_PV    0x4
+#define I40E_AQ_VSI_FLAG_CLOUD_VSI      0x8
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi);
+
+struct i40e_aqc_add_get_update_vsi_completion {
+       __le16 seid;
+       __le16 vsi_number;
+       __le16 vsi_used;
+       __le16 vsi_free;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_get_update_vsi_completion);
+
+struct i40e_aqc_vsi_properties_data {
+       /* first 96 byte are written by SW */
+       __le16 valid_sections;
+#define I40E_AQ_VSI_PROP_SWITCH_VALID       0x0001
+#define I40E_AQ_VSI_PROP_SECURITY_VALID     0x0002
+#define I40E_AQ_VSI_PROP_VLAN_VALID         0x0004
+#define I40E_AQ_VSI_PROP_CAS_PV_VALID       0x0008
+#define I40E_AQ_VSI_PROP_INGRESS_UP_VALID   0x0010
+#define I40E_AQ_VSI_PROP_EGRESS_UP_VALID    0x0020
+#define I40E_AQ_VSI_PROP_QUEUE_MAP_VALID    0x0040
+#define I40E_AQ_VSI_PROP_QUEUE_OPT_VALID    0x0080
+#define I40E_AQ_VSI_PROP_OUTER_UP_VALID     0x0100
+#define I40E_AQ_VSI_PROP_SCHED_VALID        0x0200
+       /* switch section */
+       __le16 switch_id; /* 12bit id combined with flags below */
+#define I40E_AQ_VSI_SW_ID_SHIFT             0x0000
+#define I40E_AQ_VSI_SW_ID_MASK              (0xFFF << I40E_AQ_VSI_SW_ID_SHIFT)
+#define I40E_AQ_VSI_SW_ID_FLAG_NOT_STAG     0x1000
+#define I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB     0x2000
+#define I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB     0x4000
+       u8     sw_reserved[2];
+       /* security section */
+       u8     sec_flags;
+#define I40E_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD    0x01
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_VLAN_CHK    0x02
+#define I40E_AQ_VSI_SEC_FLAG_ENABLE_MAC_CHK     0x04
+       u8     sec_reserved;
+       /* VLAN section */
+       __le16 pvid; /* VLANS include priority bits */
+       __le16 fcoe_pvid;
+       u8     port_vlan_flags;
+#define I40E_AQ_VSI_PVLAN_MODE_SHIFT        0x00
+#define I40E_AQ_VSI_PVLAN_MODE_MASK         (0x03 << \
+                                               I40E_AQ_VSI_PVLAN_MODE_SHIFT)
+#define I40E_AQ_VSI_PVLAN_MODE_TAGGED       0x01
+#define I40E_AQ_VSI_PVLAN_MODE_UNTAGGED     0x02
+#define I40E_AQ_VSI_PVLAN_MODE_ALL          0x03
+#define I40E_AQ_VSI_PVLAN_INSERT_PVID       0x04
+#define I40E_AQ_VSI_PVLAN_EMOD_SHIFT        0x03
+#define I40E_AQ_VSI_PVLAN_EMOD_MASK         (0x3 << \
+                                       I40E_AQ_VSI_PVLAN_EMOD_SHIFT)
+#define I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH     0x0
+#define I40E_AQ_VSI_PVLAN_EMOD_STR_UP       0x08
+#define I40E_AQ_VSI_PVLAN_EMOD_STR          0x10
+#define I40E_AQ_VSI_PVLAN_EMOD_NOTHING      0x18
+       u8     pvlan_reserved[3];
+       /* ingress egress up sections */
+       __le32 ingress_table; /* bitmap, 3 bits per up */
+#define I40E_AQ_VSI_UP_TABLE_UP0_SHIFT      0
+#define I40E_AQ_VSI_UP_TABLE_UP0_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP0_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP1_SHIFT      3
+#define I40E_AQ_VSI_UP_TABLE_UP1_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP1_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP2_SHIFT      6
+#define I40E_AQ_VSI_UP_TABLE_UP2_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP2_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP3_SHIFT      9
+#define I40E_AQ_VSI_UP_TABLE_UP3_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP3_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP4_SHIFT      12
+#define I40E_AQ_VSI_UP_TABLE_UP4_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP4_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP5_SHIFT      15
+#define I40E_AQ_VSI_UP_TABLE_UP5_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP5_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP6_SHIFT      18
+#define I40E_AQ_VSI_UP_TABLE_UP6_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP6_SHIFT)
+#define I40E_AQ_VSI_UP_TABLE_UP7_SHIFT      21
+#define I40E_AQ_VSI_UP_TABLE_UP7_MASK       (0x7 << \
+                                       I40E_AQ_VSI_UP_TABLE_UP7_SHIFT)
+       __le32 egress_table;   /* same defines as for ingress table */
+       /* cascaded PV section */
+       __le16 cas_pv_tag;
+       u8     cas_pv_flags;
+#define I40E_AQ_VSI_CAS_PV_TAGX_SHIFT      0x00
+#define I40E_AQ_VSI_CAS_PV_TAGX_MASK       (0x03 << \
+                                               I40E_AQ_VSI_CAS_PV_TAGX_SHIFT)
+#define I40E_AQ_VSI_CAS_PV_TAGX_LEAVE      0x00
+#define I40E_AQ_VSI_CAS_PV_TAGX_REMOVE     0x01
+#define I40E_AQ_VSI_CAS_PV_TAGX_COPY       0x02
+#define I40E_AQ_VSI_CAS_PV_INSERT_TAG      0x10
+#define I40E_AQ_VSI_CAS_PV_ETAG_PRUNE      0x20
+#define I40E_AQ_VSI_CAS_PV_ACCEPT_HOST_TAG 0x40
+       u8     cas_pv_reserved;
+       /* queue mapping section */
+       __le16 mapping_flags;
+#define I40E_AQ_VSI_QUE_MAP_CONTIG          0x0
+#define I40E_AQ_VSI_QUE_MAP_NONCONTIG       0x1
+       __le16 queue_mapping[16];
+#define I40E_AQ_VSI_QUEUE_SHIFT             0x0
+#define I40E_AQ_VSI_QUEUE_MASK              (0x7FF << I40E_AQ_VSI_QUEUE_SHIFT)
+       __le16 tc_mapping[8];
+#define I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT     0
+#define I40E_AQ_VSI_TC_QUE_OFFSET_MASK      (0x1FF << \
+                                               I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT)
+#define I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT     9
+#define I40E_AQ_VSI_TC_QUE_NUMBER_MASK      (0x7 << \
+                                               I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT)
+       /* queueing option section */
+       u8     queueing_opt_flags;
+#define I40E_AQ_VSI_QUE_OPT_TCP_ENA         0x10
+#define I40E_AQ_VSI_QUE_OPT_FCOE_ENA        0x20
+       u8     queueing_opt_reserved[3];
+       /* scheduler section */
+       u8     up_enable_bits;
+       u8     sched_reserved;
+       /* outer up section */
+       __le32 outer_up_table; /* same structure and defines as ingress table */
+       u8     cmd_reserved[8];
+       /* last 32 bytes are written by FW */
+       __le16 qs_handle[8];
+#define I40E_AQ_VSI_QS_HANDLE_INVALID  0xFFFF
+       __le16 stat_counter_idx;
+       __le16 sched_id;
+       u8     resp_reserved[12];
+};
+
+I40E_CHECK_STRUCT_LEN(128, i40e_aqc_vsi_properties_data);
+
+/* Add Port Virtualizer (direct 0x0220)
+ * also used for update PV (direct 0x0221) but only flags are used
+ * (IS_CTRL_PORT only works on add PV)
+ */
+struct i40e_aqc_add_update_pv {
+       __le16 command_flags;
+#define I40E_AQC_PV_FLAG_PV_TYPE                0x1
+#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_STAG_EN    0x2
+#define I40E_AQC_PV_FLAG_FWD_UNKNOWN_ETAG_EN    0x4
+#define I40E_AQC_PV_FLAG_IS_CTRL_PORT           0x8
+       __le16 uplink_seid;
+       __le16 connected_seid;
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv);
+
+struct i40e_aqc_add_update_pv_completion {
+       /* reserved for update; for add also encodes error if rc == ENOSPC */
+       __le16 pv_seid;
+#define I40E_AQC_PV_ERR_FLAG_NO_PV               0x1
+#define I40E_AQC_PV_ERR_FLAG_NO_SCHED            0x2
+#define I40E_AQC_PV_ERR_FLAG_NO_COUNTER          0x4
+#define I40E_AQC_PV_ERR_FLAG_NO_ENTRY            0x8
+       u8     reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_update_pv_completion);
+
+/* Get PV Params (direct 0x0222)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+
+struct i40e_aqc_get_pv_params_completion {
+       __le16 seid;
+       __le16 default_stag;
+       __le16 pv_flags; /* same flags as add_pv */
+#define I40E_AQC_GET_PV_PV_TYPE            0x1
+#define I40E_AQC_GET_PV_FRWD_UNKNOWN_STAG  0x2
+#define I40E_AQC_GET_PV_FRWD_UNKNOWN_ETAG  0x4
+       u8     reserved[8];
+       __le16 default_port_seid;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_pv_params_completion);
+
+/* Add VEB (direct 0x0230) */
+struct i40e_aqc_add_veb {
+       __le16 uplink_seid;
+       __le16 downlink_seid;
+       __le16 veb_flags;
+#define I40E_AQC_ADD_VEB_FLOATING           0x1
+#define I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT    1
+#define I40E_AQC_ADD_VEB_PORT_TYPE_MASK     (0x3 << \
+                                       I40E_AQC_ADD_VEB_PORT_TYPE_SHIFT)
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT  0x2
+#define I40E_AQC_ADD_VEB_PORT_TYPE_DATA     0x4
+#define I40E_AQC_ADD_VEB_ENABLE_L2_FILTER   0x8
+       u8     enable_tcs;
+       u8     reserved[9];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb);
+
+struct i40e_aqc_add_veb_completion {
+       u8     reserved[6];
+       __le16 switch_seid;
+       /* also encodes error if rc == ENOSPC; codes are the same as add_pv */
+       __le16 veb_seid;
+#define I40E_AQC_VEB_ERR_FLAG_NO_VEB              0x1
+#define I40E_AQC_VEB_ERR_FLAG_NO_SCHED            0x2
+#define I40E_AQC_VEB_ERR_FLAG_NO_COUNTER          0x4
+#define I40E_AQC_VEB_ERR_FLAG_NO_ENTRY            0x8
+       __le16 statistic_index;
+       __le16 vebs_used;
+       __le16 vebs_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_veb_completion);
+
+/* Get VEB Parameters (direct 0x0232)
+ * uses i40e_aqc_switch_seid for the descriptor
+ */
+struct i40e_aqc_get_veb_parameters_completion {
+       __le16 seid;
+       __le16 switch_id;
+       __le16 veb_flags; /* only the first/last flags from 0x0230 is valid */
+       __le16 statistic_index;
+       __le16 vebs_used;
+       __le16 vebs_free;
+       u8     reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_veb_parameters_completion);
+
+/* Delete Element (direct 0x0243)
+ * uses the generic i40e_aqc_switch_seid
+ */
+
+/* Add MAC-VLAN (indirect 0x0250) */
+
+/* used for the command for most vlan commands */
+struct i40e_aqc_macvlan {
+       __le16 num_addresses;
+       __le16 seid[3];
+#define I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_MACVLAN_CMD_SEID_NUM_MASK   (0x3FF << \
+                                       I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
+#define I40E_AQC_MACVLAN_CMD_SEID_VALID      0x8000
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_macvlan);
+
+/* indirect data for command and response */
+struct i40e_aqc_add_macvlan_element_data {
+       u8     mac_addr[6];
+       __le16 vlan_tag;
+       __le16 flags;
+#define I40E_AQC_MACVLAN_ADD_PERFECT_MATCH     0x0001
+#define I40E_AQC_MACVLAN_ADD_HASH_MATCH        0x0002
+#define I40E_AQC_MACVLAN_ADD_IGNORE_VLAN       0x0004
+#define I40E_AQC_MACVLAN_ADD_TO_QUEUE          0x0008
+       __le16 queue_number;
+#define I40E_AQC_MACVLAN_CMD_QUEUE_SHIFT  0
+#define I40E_AQC_MACVLAN_CMD_QUEUE_MASK   (0x7FF << \
+                                       I40E_AQC_MACVLAN_CMD_SEID_NUM_SHIFT)
+       /* response section */
+       u8     match_method;
+#define I40E_AQC_MM_PERFECT_MATCH             0x01
+#define I40E_AQC_MM_HASH_MATCH                0x02
+#define I40E_AQC_MM_ERR_NO_RES                0xFF
+       u8     reserved1[3];
+};
+
+struct i40e_aqc_add_remove_macvlan_completion {
+       __le16 perfect_mac_used;
+       __le16 perfect_mac_free;
+       __le16 unicast_hash_free;
+       __le16 multicast_hash_free;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_macvlan_completion);
+
+/* Remove MAC-VLAN (indirect 0x0251)
+ * uses i40e_aqc_macvlan for the descriptor
+ * data points to an array of num_addresses of elements
+ */
+
+struct i40e_aqc_remove_macvlan_element_data {
+       u8     mac_addr[6];
+       __le16 vlan_tag;
+       u8     flags;
+#define I40E_AQC_MACVLAN_DEL_PERFECT_MATCH      0x01
+#define I40E_AQC_MACVLAN_DEL_HASH_MATCH         0x02
+#define I40E_AQC_MACVLAN_DEL_IGNORE_VLAN        0x08
+#define I40E_AQC_MACVLAN_DEL_ALL_VSIS           0x10
+       u8     reserved[3];
+       /* reply section */
+       u8     error_code;
+#define I40E_AQC_REMOVE_MACVLAN_SUCCESS         0x0
+#define I40E_AQC_REMOVE_MACVLAN_FAIL            0xFF
+       u8     reply_reserved[3];
+};
+
+/* Add VLAN (indirect 0x0252)
+ * Remove VLAN (indirect 0x0253)
+ * use the generic i40e_aqc_macvlan for the command
+ */
+struct i40e_aqc_add_remove_vlan_element_data {
+       __le16 vlan_tag;
+       u8     vlan_flags;
+/* flags for add VLAN */
+#define I40E_AQC_ADD_VLAN_LOCAL             0x1
+#define I40E_AQC_ADD_PVLAN_TYPE_SHIFT       1
+#define I40E_AQC_ADD_PVLAN_TYPE_MASK        (0x3 << \
+                                               I40E_AQC_ADD_PVLAN_TYPE_SHIFT)
+#define I40E_AQC_ADD_PVLAN_TYPE_REGULAR     0x0
+#define I40E_AQC_ADD_PVLAN_TYPE_PRIMARY     0x2
+#define I40E_AQC_ADD_PVLAN_TYPE_SECONDARY   0x4
+#define I40E_AQC_VLAN_PTYPE_SHIFT           3
+#define I40E_AQC_VLAN_PTYPE_MASK            (0x3 << I40E_AQC_VLAN_PTYPE_SHIFT)
+#define I40E_AQC_VLAN_PTYPE_REGULAR_VSI     0x0
+#define I40E_AQC_VLAN_PTYPE_PROMISC_VSI     0x8
+#define I40E_AQC_VLAN_PTYPE_COMMUNITY_VSI   0x10
+#define I40E_AQC_VLAN_PTYPE_ISOLATED_VSI    0x18
+/* flags for remove VLAN */
+#define I40E_AQC_REMOVE_VLAN_ALL            0x1
+       u8     reserved;
+       u8     result;
+/* flags for add VLAN */
+#define I40E_AQC_ADD_VLAN_SUCCESS       0x0
+#define I40E_AQC_ADD_VLAN_FAIL_REQUEST  0xFE
+#define I40E_AQC_ADD_VLAN_FAIL_RESOURCE 0xFF
+/* flags for remove VLAN */
+#define I40E_AQC_REMOVE_VLAN_SUCCESS    0x0
+#define I40E_AQC_REMOVE_VLAN_FAIL       0xFF
+       u8     reserved1[3];
+};
+
+struct i40e_aqc_add_remove_vlan_completion {
+       u8     reserved[4];
+       __le16 vlans_used;
+       __le16 vlans_free;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+/* Set VSI Promiscuous Modes (direct 0x0254) */
+struct i40e_aqc_set_vsi_promiscuous_modes {
+       __le16 promiscuous_flags;
+       __le16 valid_flags;
+/* flags used for both fields above */
+#define I40E_AQC_SET_VSI_PROMISC_UNICAST     0x01
+#define I40E_AQC_SET_VSI_PROMISC_MULTICAST   0x02
+#define I40E_AQC_SET_VSI_PROMISC_BROADCAST   0x04
+#define I40E_AQC_SET_VSI_DEFAULT             0x08
+#define I40E_AQC_SET_VSI_PROMISC_VLAN        0x10
+       __le16 seid;
+#define I40E_AQC_VSI_PROM_CMD_SEID_MASK      0x3FF
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_vsi_promiscuous_modes);
+
+/* Add S/E-tag command (direct 0x0255)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_add_tag {
+       __le16 flags;
+#define I40E_AQC_ADD_TAG_FLAG_TO_QUEUE     0x0001
+       __le16 seid;
+#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_ADD_TAG_CMD_SEID_NUM_MASK   (0x3FF << \
+                                       I40E_AQC_ADD_TAG_CMD_SEID_NUM_SHIFT)
+       __le16 tag;
+       __le16 queue_number;
+       u8     reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_tag);
+
+struct i40e_aqc_add_remove_tag_completion {
+       u8     reserved[12];
+       __le16 tags_used;
+       __le16 tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_tag_completion);
+
+/* Remove S/E-tag command (direct 0x0256)
+ * Uses generic i40e_aqc_add_remove_tag_completion for completion
+ */
+struct i40e_aqc_remove_tag {
+       __le16 seid;
+#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_MASK   (0x3FF << \
+                                       I40E_AQC_REMOVE_TAG_CMD_SEID_NUM_SHIFT)
+       __le16 tag;
+       u8     reserved[12];
+};
+
+/* Add multicast E-Tag (direct 0x0257)
+ * del multicast E-Tag (direct 0x0258) only uses pv_seid and etag fields
+ * and no external data
+ */
+struct i40e_aqc_add_remove_mcast_etag {
+       __le16 pv_seid;
+       __le16 etag;
+       u8     num_unicast_etags;
+       u8     reserved[3];
+       __le32 addr_high;          /* address of array of 2-byte s-tags */
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag);
+
+struct i40e_aqc_add_remove_mcast_etag_completion {
+       u8     reserved[4];
+       __le16 mcast_etags_used;
+       __le16 mcast_etags_free;
+       __le32 addr_high;
+       __le32 addr_low;
+
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_mcast_etag_completion);
+
+/* Update S/E-Tag (direct 0x0259) */
+struct i40e_aqc_update_tag {
+       __le16 seid;
+#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_MASK   (0x3FF << \
+                                       I40E_AQC_UPDATE_TAG_CMD_SEID_NUM_SHIFT)
+       __le16 old_tag;
+       __le16 new_tag;
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag);
+
+struct i40e_aqc_update_tag_completion {
+       u8     reserved[12];
+       __le16 tags_used;
+       __le16 tags_free;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_update_tag_completion);
+
+/* Add Control Packet filter (direct 0x025A)
+ * Remove Control Packet filter (direct 0x025B)
+ * uses the i40e_aqc_add_oveb_cloud,
+ * and the generic direct completion structure
+ */
+struct i40e_aqc_add_remove_control_packet_filter {
+       u8     mac[6];
+       __le16 etype;
+       __le16 flags;
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC    0x0001
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_DROP          0x0002
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TO_QUEUE      0x0004
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX            0x0008
+#define I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX            0x0000
+       __le16 seid;
+#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_MASK   (0x3FF << \
+                               I40E_AQC_ADD_CONTROL_PACKET_CMD_SEID_NUM_SHIFT)
+       __le16 queue;
+       u8     reserved[2];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter);
+
+struct i40e_aqc_add_remove_control_packet_filter_completion {
+       __le16 mac_etype_used;
+       __le16 etype_used;
+       __le16 mac_etype_free;
+       __le16 etype_free;
+       u8     reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_control_packet_filter_completion);
+
+/* Add Cloud filters (indirect 0x025C)
+ * Remove Cloud filters (indirect 0x025D)
+ * uses the i40e_aqc_add_remove_cloud_filters,
+ * and the generic indirect completion structure
+ */
+struct i40e_aqc_add_remove_cloud_filters {
+       u8     num_filters;
+       u8     reserved;
+       __le16 seid;
+#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT  0
+#define I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_MASK   (0x3FF << \
+                                       I40E_AQC_ADD_CLOUD_CMD_SEID_NUM_SHIFT)
+       u8     reserved2[4];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_remove_cloud_filters);
+
+struct i40e_aqc_add_remove_cloud_filters_element_data {
+       u8     outer_mac[6];
+       u8     inner_mac[6];
+       __le16 inner_vlan;
+       union {
+               struct {
+                       u8 reserved[12];
+                       u8 data[4];
+               } v4;
+               struct {
+                       u8 data[16];
+                       } v6;
+               } ipaddr;
+       __le16 flags;
+#define I40E_AQC_ADD_CLOUD_FILTER_SHIFT                 0
+#define I40E_AQC_ADD_CLOUD_FILTER_MASK                  (0x3F << \
+                                       I40E_AQC_ADD_CLOUD_FILTER_SHIFT)
+#define I40E_AQC_ADD_CLOUD_FILTER_OIP                   0x0001
+#define I40E_AQC_ADD_CLOUD_FILTER_OIP_GRE               0x0002
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN            0x0003
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_GRE        0x0004
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_TEN_ID           0x0006
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC_IVLAN_VNL        0x0007
+/* 0x0008 reserved */
+#define I40E_AQC_ADD_CLOUD_FILTER_OMAC                  0x0009
+#define I40E_AQC_ADD_CLOUD_FILTER_IMAC                  0x000A
+#define I40E_AQC_ADD_CLOUD_FLAGS_TO_QUEUE               0x0080
+#define I40E_AQC_ADD_CLOUD_VNK_SHIFT                    6
+#define I40E_AQC_ADD_CLOUD_VNK_MASK                     0x00C0
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV4                   0
+#define I40E_AQC_ADD_CLOUD_FLAGS_IPV6                   0x0100
+       __le32 key_low;
+       __le32 key_high;
+       __le16 queue_number;
+#define I40E_AQC_ADD_CLOUD_QUEUE_SHIFT                  0
+#define I40E_AQC_ADD_CLOUD_QUEUE_MASK                   (0x3F << \
+                                       I40E_AQC_ADD_CLOUD_QUEUE_SHIFT)
+       u8     reserved[14];
+       /* response section */
+       u8     allocation_result;
+#define I40E_AQC_ADD_CLOUD_FILTER_SUCCESS         0x0
+#define I40E_AQC_ADD_CLOUD_FILTER_FAIL            0xFF
+       u8     response_reserved[7];
+};
+
+struct i40e_aqc_remove_cloud_filters_completion {
+       __le16 perfect_ovlan_used;
+       __le16 perfect_ovlan_free;
+       __le16 vlan_used;
+       __le16 vlan_free;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_cloud_filters_completion);
+
+/* Add Mirror Rule (indirect or direct 0x0260)
+ * Delete Mirror Rule (indirect or direct 0x0261)
+ * note: some rule types (4,5) do not use an external buffer.
+ *       take care to set the flags correctly.
+ */
+struct i40e_aqc_add_delete_mirror_rule {
+       __le16 seid;
+       __le16 rule_type;
+#define I40E_AQC_MIRROR_RULE_TYPE_SHIFT            0
+#define I40E_AQC_MIRROR_RULE_TYPE_MASK             (0x7 << \
+                                               I40E_AQC_MIRROR_RULE_TYPE_SHIFT)
+#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_INGRESS    1
+#define I40E_AQC_MIRROR_RULE_TYPE_VPORT_EGRESS     2
+#define I40E_AQC_MIRROR_RULE_TYPE_VLAN             3
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_INGRESS      4
+#define I40E_AQC_MIRROR_RULE_TYPE_ALL_EGRESS       5
+       __le16 num_entries;
+       __le16 destination;  /* VSI for add, rule id for delete */
+       __le32 addr_high;    /* address of array of 2-byte VSI or VLAN ids */
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule);
+
+struct i40e_aqc_add_delete_mirror_rule_completion {
+       u8     reserved[2];
+       __le16 rule_id;  /* only used on add */
+       __le16 mirror_rules_used;
+       __le16 mirror_rules_free;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_delete_mirror_rule_completion);
+
+/* Set Storm Control Configuration (direct 0x0280)
+ * Get Storm Control Configuration (direct 0x0281)
+ *    the command and response use the same descriptor structure
+ */
+struct i40e_aqc_set_get_storm_control_config {
+       __le32 broadcast_threshold;
+       __le32 multicast_threshold;
+       __le32 control_flags;
+#define I40E_AQC_STORM_CONTROL_MDIPW            0x01
+#define I40E_AQC_STORM_CONTROL_MDICW            0x02
+#define I40E_AQC_STORM_CONTROL_BDIPW            0x04
+#define I40E_AQC_STORM_CONTROL_BDICW            0x08
+#define I40E_AQC_STORM_CONTROL_BIDU             0x10
+#define I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT   8
+#define I40E_AQC_STORM_CONTROL_INTERVAL_MASK    (0x3FF << \
+                                       I40E_AQC_STORM_CONTROL_INTERVAL_SHIFT)
+       u8     reserved[4];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_get_storm_control_config);
+
+/* DCB 0x03xx*/
+
+/* PFC Ignore (direct 0x0301)
+ *    the command and response use the same descriptor structure
+ */
+struct i40e_aqc_pfc_ignore {
+       u8     tc_bitmap;
+       u8     command_flags; /* unused on response */
+#define I40E_AQC_PFC_IGNORE_SET    0x80
+#define I40E_AQC_PFC_IGNORE_CLEAR  0x0
+       u8     reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pfc_ignore);
+
+/* DCB Update (direct 0x0302) uses the i40e_aq_desc structure
+ * with no parameters
+ */
+
+/* TX scheduler 0x04xx */
+
+/* Almost all the indirect commands use
+ * this generic struct to pass the SEID in param0
+ */
+struct i40e_aqc_tx_sched_ind {
+       __le16 vsi_seid;
+       u8     reserved[6];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tx_sched_ind);
+
+/* Several commands respond with a set of queue set handles */
+struct i40e_aqc_qs_handles_resp {
+       __le16 qs_handles[8];
+};
+
+/* Configure VSI BW limits (direct 0x0400) */
+struct i40e_aqc_configure_vsi_bw_limit {
+       __le16 vsi_seid;
+       u8     reserved[2];
+       __le16 credit;
+       u8     reserved1[2];
+       u8     max_credit; /* 0-3, limit = 2^max */
+       u8     reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_vsi_bw_limit);
+
+/* Configure VSI Bandwidth Limit per Traffic Type (indirect 0x0406)
+ *    responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_ets_sla_bw_data {
+       u8     tc_valid_bits;
+       u8     reserved[15];
+       __le16 tc_bw_credits[8]; /* FW writesback QS handles here */
+
+       /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+       __le16 tc_bw_max[2];
+       u8     reserved1[28];
+};
+
+/* Configure VSI Bandwidth Allocation per Traffic Type (indirect 0x0407)
+ *    responds with i40e_aqc_qs_handles_resp
+ */
+struct i40e_aqc_configure_vsi_tc_bw_data {
+       u8     tc_valid_bits;
+       u8     reserved[3];
+       u8     tc_bw_credits[8];
+       u8     reserved1[4];
+       __le16 qs_handles[8];
+};
+
+/* Query vsi bw configuration (indirect 0x0408) */
+struct i40e_aqc_query_vsi_bw_config_resp {
+       u8     tc_valid_bits;
+       u8     tc_suspended_bits;
+       u8     reserved[14];
+       __le16 qs_handles[8];
+       u8     reserved1[4];
+       __le16 port_bw_limit;
+       u8     reserved2[2];
+       u8     max_bw; /* 0-3, limit = 2^max */
+       u8     reserved3[23];
+};
+
+/* Query VSI Bandwidth Allocation per Traffic Type (indirect 0x040A) */
+struct i40e_aqc_query_vsi_ets_sla_config_resp {
+       u8     tc_valid_bits;
+       u8     reserved[3];
+       u8     share_credits[8];
+       __le16 credits[8];
+
+       /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+       __le16 tc_bw_max[2];
+};
+
+/* Configure Switching Component Bandwidth Limit (direct 0x0410) */
+struct i40e_aqc_configure_switching_comp_bw_limit {
+       __le16 seid;
+       u8     reserved[2];
+       __le16 credit;
+       u8     reserved1[2];
+       u8     max_bw; /* 0-3, limit = 2^max */
+       u8     reserved2[7];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_configure_switching_comp_bw_limit);
+
+/* Enable  Physical Port ETS (indirect 0x0413)
+ * Modify  Physical Port ETS (indirect 0x0414)
+ * Disable Physical Port ETS (indirect 0x0415)
+ */
+struct i40e_aqc_configure_switching_comp_ets_data {
+       u8     reserved[4];
+       u8     tc_valid_bits;
+       u8     reserved1;
+       u8     tc_strict_priority_flags;
+       u8     reserved2[17];
+       u8     tc_bw_share_credits[8];
+       u8     reserved3[96];
+};
+
+/* Configure Switching Component Bandwidth Limits per Tc (indirect 0x0416) */
+struct i40e_aqc_configure_switching_comp_ets_bw_limit_data {
+       u8     tc_valid_bits;
+       u8     reserved[15];
+       __le16 tc_bw_credit[8];
+
+       /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+       __le16 tc_bw_max[2];
+       u8     reserved1[28];
+};
+
+/* Configure Switching Component Bandwidth Allocation per Tc
+ * (indirect 0x0417)
+ */
+struct i40e_aqc_configure_switching_comp_bw_config_data {
+       u8     tc_valid_bits;
+       u8     reserved[2];
+       u8     absolute_credits; /* bool */
+       u8     tc_bw_share_credits[8];
+       u8     reserved1[20];
+};
+
+/* Query Switching Component Configuration (indirect 0x0418) */
+struct i40e_aqc_query_switching_comp_ets_config_resp {
+       u8     tc_valid_bits;
+       u8     reserved[35];
+       __le16 port_bw_limit;
+       u8     reserved1[2];
+       u8     tc_bw_max; /* 0-3, limit = 2^max */
+       u8     reserved2[23];
+};
+
+/* Query PhysicalPort ETS Configuration (indirect 0x0419) */
+struct i40e_aqc_query_port_ets_config_resp {
+       u8     reserved[4];
+       u8     tc_valid_bits;
+       u8     reserved1;
+       u8     tc_strict_priority_bits;
+       u8     reserved2;
+       u8     tc_bw_share_credits[8];
+       __le16 tc_bw_limits[8];
+
+       /* 4 bits per tc 0-7, 4th bit reserved, limit = 2^max */
+       __le16 tc_bw_max[2];
+       u8     reserved3[32];
+};
+
+/* Query Switching Component Bandwidth Allocation per Traffic Type
+ * (indirect 0x041A)
+ */
+struct i40e_aqc_query_switching_comp_bw_config_resp {
+       u8     tc_valid_bits;
+       u8     reserved[2];
+       u8     absolute_credits_enable; /* bool */
+       u8     tc_bw_share_credits[8];
+       __le16 tc_bw_limits[8];
+
+       /* 4 bits per tc 0-7, 4th bit is reserved, limit = 2^max */
+       __le16 tc_bw_max[2];
+};
+
+/* Suspend/resume port TX traffic
+ * (direct 0x041B and 0x041C) uses the generic SEID struct
+ */
+
+/* Get and set the active HMC resource profile and status.
+ * (direct 0x0500) and (direct 0x0501)
+ */
+struct i40e_aq_get_set_hmc_resource_profile {
+       u8     pm_profile;
+       u8     pe_vf_enabled;
+       u8     reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_get_set_hmc_resource_profile);
+
+enum i40e_aq_hmc_profile {
+       /* I40E_HMC_PROFILE_NO_CHANGE    = 0, reserved */
+       I40E_HMC_PROFILE_DEFAULT     = 1,
+       I40E_HMC_PROFILE_FAVOR_VF    = 2,
+       I40E_HMC_PROFILE_EQUAL       = 3,
+};
+
+#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_PM_MASK       0xF
+#define I40E_AQ_GET_HMC_RESOURCE_PROFILE_COUNT_MASK    0x3F
+
+/* Get PHY Abilities (indirect 0x0600) uses the generic indirect struct */
+
+/* set in param0 for get phy abilities to report qualified modules */
+#define I40E_AQ_PHY_REPORT_QUALIFIED_MODULES  0x0001
+#define I40E_AQ_PHY_REPORT_INITIAL_VALUES     0x0002
+
+enum i40e_aq_phy_type {
+       I40E_PHY_TYPE_SGMII                     = 0x0,
+       I40E_PHY_TYPE_1000BASE_KX               = 0x1,
+       I40E_PHY_TYPE_10GBASE_KX4               = 0x2,
+       I40E_PHY_TYPE_10GBASE_KR                = 0x3,
+       I40E_PHY_TYPE_40GBASE_KR4               = 0x4,
+       I40E_PHY_TYPE_XAUI                      = 0x5,
+       I40E_PHY_TYPE_XFI                       = 0x6,
+       I40E_PHY_TYPE_SFI                       = 0x7,
+       I40E_PHY_TYPE_XLAUI                     = 0x8,
+       I40E_PHY_TYPE_XLPPI                     = 0x9,
+       I40E_PHY_TYPE_40GBASE_CR4_CU            = 0xA,
+       I40E_PHY_TYPE_10GBASE_CR1_CU            = 0xB,
+       I40E_PHY_TYPE_100BASE_TX                = 0x11,
+       I40E_PHY_TYPE_1000BASE_T                = 0x12,
+       I40E_PHY_TYPE_10GBASE_T                 = 0x13,
+       I40E_PHY_TYPE_10GBASE_SR                = 0x14,
+       I40E_PHY_TYPE_10GBASE_LR                = 0x15,
+       I40E_PHY_TYPE_10GBASE_SFPP_CU           = 0x16,
+       I40E_PHY_TYPE_10GBASE_CR1               = 0x17,
+       I40E_PHY_TYPE_40GBASE_CR4               = 0x18,
+       I40E_PHY_TYPE_40GBASE_SR4               = 0x19,
+       I40E_PHY_TYPE_40GBASE_LR4               = 0x1A,
+       I40E_PHY_TYPE_20GBASE_KR2               = 0x1B,
+       I40E_PHY_TYPE_MAX
+};
+
+#define I40E_LINK_SPEED_100MB_SHIFT    0x1
+#define I40E_LINK_SPEED_1000MB_SHIFT   0x2
+#define I40E_LINK_SPEED_10GB_SHIFT     0x3
+#define I40E_LINK_SPEED_40GB_SHIFT     0x4
+#define I40E_LINK_SPEED_20GB_SHIFT     0x5
+
+enum i40e_aq_link_speed {
+       I40E_LINK_SPEED_UNKNOWN = 0,
+       I40E_LINK_SPEED_100MB   = (1 << I40E_LINK_SPEED_100MB_SHIFT),
+       I40E_LINK_SPEED_1GB     = (1 << I40E_LINK_SPEED_1000MB_SHIFT),
+       I40E_LINK_SPEED_10GB    = (1 << I40E_LINK_SPEED_10GB_SHIFT),
+       I40E_LINK_SPEED_40GB    = (1 << I40E_LINK_SPEED_40GB_SHIFT),
+       I40E_LINK_SPEED_20GB    = (1 << I40E_LINK_SPEED_20GB_SHIFT)
+};
+
+struct i40e_aqc_module_desc {
+       u8 oui[3];
+       u8 reserved1;
+       u8 part_number[16];
+       u8 revision[4];
+       u8 reserved2[8];
+};
+
+struct i40e_aq_get_phy_abilities_resp {
+       __le32 phy_type;       /* bitmap using the above enum for offsets */
+       u8     link_speed;     /* bitmap using the above enum */
+       u8     abilities;
+#define I40E_AQ_PHY_FLAG_PAUSE_TX         0x01
+#define I40E_AQ_PHY_FLAG_PAUSE_RX         0x02
+#define I40E_AQ_PHY_FLAG_LOW_POWER        0x04
+#define I40E_AQ_PHY_FLAG_AN_SHIFT         3
+#define I40E_AQ_PHY_FLAG_AN_MASK          (0x3 << I40E_AQ_PHY_FLAG_AN_SHIFT)
+#define I40E_AQ_PHY_FLAG_AN_OFF           0x00 /* link forced on */
+#define I40E_AQ_PHY_FLAG_AN_OFF_LINK_DOWN 0x01
+#define I40E_AQ_PHY_FLAG_AN_ON            0x02
+#define I40E_AQ_PHY_FLAG_MODULE_QUAL      0x20
+       __le16 eee_capability;
+#define I40E_AQ_EEE_100BASE_TX       0x0002
+#define I40E_AQ_EEE_1000BASE_T       0x0004
+#define I40E_AQ_EEE_10GBASE_T        0x0008
+#define I40E_AQ_EEE_1000BASE_KX      0x0010
+#define I40E_AQ_EEE_10GBASE_KX4      0x0020
+#define I40E_AQ_EEE_10GBASE_KR       0x0040
+       __le32 eeer_val;
+       u8     d3_lpan;
+#define I40E_AQ_SET_PHY_D3_LPAN_ENA  0x01
+       u8     reserved[3];
+       u8     phy_id[4];
+       u8     module_type[3];
+       u8     qualified_module_count;
+#define I40E_AQ_PHY_MAX_QMS          16
+       struct i40e_aqc_module_desc  qualified_module[I40E_AQ_PHY_MAX_QMS];
+};
+
+/* Set PHY Config (direct 0x0601) */
+struct i40e_aq_set_phy_config { /* same bits as above in all */
+       __le32 phy_type;
+       u8     link_speed;
+       u8     abilities;
+       __le16 eee_capability;
+       __le32 eeer;
+       u8     low_power_ctrl;
+       u8     reserved[3];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_phy_config);
+
+/* Set MAC Config command data structure (direct 0x0603) */
+struct i40e_aq_set_mac_config {
+       __le16 max_frame_size;
+       u8     params;
+#define I40E_AQ_SET_MAC_CONFIG_CRC_EN           0x04
+#define I40E_AQ_SET_MAC_CONFIG_PACING_MASK      0x78
+#define I40E_AQ_SET_MAC_CONFIG_PACING_SHIFT     3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_NONE      0x0
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1B_13TX   0xF
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_9TX   0x9
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_4TX   0x8
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_7TX   0x7
+#define I40E_AQ_SET_MAC_CONFIG_PACING_2DW_3TX   0x6
+#define I40E_AQ_SET_MAC_CONFIG_PACING_1DW_1TX   0x5
+#define I40E_AQ_SET_MAC_CONFIG_PACING_3DW_2TX   0x4
+#define I40E_AQ_SET_MAC_CONFIG_PACING_7DW_3TX   0x3
+#define I40E_AQ_SET_MAC_CONFIG_PACING_4DW_1TX   0x2
+#define I40E_AQ_SET_MAC_CONFIG_PACING_9DW_1TX   0x1
+       u8     tx_timer_priority; /* bitmap */
+       __le16 tx_timer_value;
+       __le16 fc_refresh_threshold;
+       u8     reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aq_set_mac_config);
+
+/* Restart Auto-Negotiation (direct 0x605) */
+struct i40e_aqc_set_link_restart_an {
+       u8     command;
+#define I40E_AQ_PHY_RESTART_AN  0x02
+#define I40E_AQ_PHY_LINK_ENABLE 0x04
+       u8     reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_link_restart_an);
+
+/* Get Link Status cmd & response data structure (direct 0x0607) */
+struct i40e_aqc_get_link_status {
+       __le16 command_flags; /* only field set on command */
+#define I40E_AQ_LSE_MASK             0x3
+#define I40E_AQ_LSE_NOP              0x0
+#define I40E_AQ_LSE_DISABLE          0x2
+#define I40E_AQ_LSE_ENABLE           0x3
+/* only response uses this flag */
+#define I40E_AQ_LSE_IS_ENABLED       0x1
+       u8     phy_type;    /* i40e_aq_phy_type   */
+       u8     link_speed;  /* i40e_aq_link_speed */
+       u8     link_info;
+#define I40E_AQ_LINK_UP              0x01
+#define I40E_AQ_LINK_FAULT           0x02
+#define I40E_AQ_LINK_FAULT_TX        0x04
+#define I40E_AQ_LINK_FAULT_RX        0x08
+#define I40E_AQ_LINK_FAULT_REMOTE    0x10
+#define I40E_AQ_MEDIA_AVAILABLE      0x40
+#define I40E_AQ_SIGNAL_DETECT        0x80
+       u8     an_info;
+#define I40E_AQ_AN_COMPLETED         0x01
+#define I40E_AQ_LP_AN_ABILITY        0x02
+#define I40E_AQ_PD_FAULT             0x04
+#define I40E_AQ_FEC_EN               0x08
+#define I40E_AQ_PHY_LOW_POWER        0x10
+#define I40E_AQ_LINK_PAUSE_TX        0x20
+#define I40E_AQ_LINK_PAUSE_RX        0x40
+#define I40E_AQ_QUALIFIED_MODULE     0x80
+       u8     ext_info;
+#define I40E_AQ_LINK_PHY_TEMP_ALARM  0x01
+#define I40E_AQ_LINK_XCESSIVE_ERRORS 0x02
+#define I40E_AQ_LINK_TX_SHIFT        0x02
+#define I40E_AQ_LINK_TX_MASK         (0x03 << I40E_AQ_LINK_TX_SHIFT)
+#define I40E_AQ_LINK_TX_ACTIVE       0x00
+#define I40E_AQ_LINK_TX_DRAINED      0x01
+#define I40E_AQ_LINK_TX_FLUSHED      0x03
+       u8     loopback;         /* use defines from i40e_aqc_set_lb_mode */
+       __le16 max_frame_size;
+       u8     config;
+#define I40E_AQ_CONFIG_CRC_ENA       0x04
+#define I40E_AQ_CONFIG_PACING_MASK   0x78
+       u8     reserved[5];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_get_link_status);
+
+/* Set event mask command (direct 0x613) */
+struct i40e_aqc_set_phy_int_mask {
+       u8     reserved[8];
+       __le16 event_mask;
+#define I40E_AQ_EVENT_LINK_UPDOWN       0x0002
+#define I40E_AQ_EVENT_MEDIA_NA          0x0004
+#define I40E_AQ_EVENT_LINK_FAULT        0x0008
+#define I40E_AQ_EVENT_PHY_TEMP_ALARM    0x0010
+#define I40E_AQ_EVENT_EXCESSIVE_ERRORS  0x0020
+#define I40E_AQ_EVENT_SIGNAL_DETECT     0x0040
+#define I40E_AQ_EVENT_AN_COMPLETED      0x0080
+#define I40E_AQ_EVENT_MODULE_QUAL_FAIL  0x0100
+#define I40E_AQ_EVENT_PORT_TX_SUSPENDED 0x0200
+       u8     reserved1[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_int_mask);
+
+/* Get Local AN advt register (direct 0x0614)
+ * Set Local AN advt register (direct 0x0615)
+ * Get Link Partner AN advt register (direct 0x0616)
+ */
+struct i40e_aqc_an_advt_reg {
+       __le32 local_an_reg0;
+       __le16 local_an_reg1;
+       u8     reserved[10];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_an_advt_reg);
+
+/* Set Loopback mode (0x0618) */
+struct i40e_aqc_set_lb_mode {
+       __le16 lb_mode;
+#define I40E_AQ_LB_PHY_LOCAL   0x01
+#define I40E_AQ_LB_PHY_REMOTE  0x02
+#define I40E_AQ_LB_MAC_LOCAL   0x04
+       u8     reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_lb_mode);
+
+/* Set PHY Reset command (0x0622) */
+struct i40e_aqc_set_phy_reset {
+       u8     reset_flags;
+#define I40E_AQ_PHY_RESET_REQUEST  0x02
+       u8     reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_set_phy_reset);
+
+enum i40e_aq_phy_reg_type {
+       I40E_AQC_PHY_REG_INTERNAL         = 0x1,
+       I40E_AQC_PHY_REG_EXERNAL_BASET    = 0x2,
+       I40E_AQC_PHY_REG_EXERNAL_MODULE   = 0x3
+};
+
+/* NVM Read command (indirect 0x0701)
+ * NVM Erase commands (direct 0x0702)
+ * NVM Update commands (indirect 0x0703)
+ */
+struct i40e_aqc_nvm_update {
+       u8     command_flags;
+#define I40E_AQ_NVM_LAST_CMD    0x01
+#define I40E_AQ_NVM_FLASH_ONLY  0x80
+       u8     module_pointer;
+       __le16 length;
+       __le32 offset;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_nvm_update);
+
+/* Send to PF command (indirect 0x0801) id is only used by PF
+ * Send to VF command (indirect 0x0802) id is only used by PF
+ * Send to Peer PF command (indirect 0x0803)
+ */
+struct i40e_aqc_pf_vf_message {
+       __le32 id;
+       u8     reserved[4];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_pf_vf_message);
+
+/* Alternate structure */
+
+/* Direct write (direct 0x0900)
+ * Direct read (direct 0x0902)
+ */
+struct i40e_aqc_alternate_write {
+       __le32 address0;
+       __le32 data0;
+       __le32 address1;
+       __le32 data1;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write);
+
+/* Indirect write (indirect 0x0901)
+ * Indirect read (indirect 0x0903)
+ */
+
+struct i40e_aqc_alternate_ind_write {
+       __le32 address;
+       __le32 length;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_ind_write);
+
+/* Done alternate write (direct 0x0904)
+ * uses i40e_aq_desc
+ */
+struct i40e_aqc_alternate_write_done {
+       __le16 cmd_flags;
+#define I40E_AQ_ALTERNATE_MODE_BIOS_MASK       1
+#define I40E_AQ_ALTERNATE_MODE_BIOS_LEGACY     0
+#define I40E_AQ_ALTERNATE_MODE_BIOS_UEFI       1
+#define I40E_AQ_ALTERNATE_RESET_NEEDED         2
+       u8     reserved[14];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_write_done);
+
+/* Set OEM mode (direct 0x0905) */
+struct i40e_aqc_alternate_set_mode {
+       __le32 mode;
+#define I40E_AQ_ALTERNATE_MODE_NONE    0
+#define I40E_AQ_ALTERNATE_MODE_OEM     1
+       u8     reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_alternate_set_mode);
+
+/* Clear port Alternate RAM (direct 0x0906) uses i40e_aq_desc */
+
+/* async events 0x10xx */
+
+/* Lan Queue Overflow Event (direct, 0x1001) */
+struct i40e_aqc_lan_overflow {
+       __le32 prtdcb_rupto;
+       __le32 otx_ctl;
+       u8     reserved[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lan_overflow);
+
+/* Get LLDP MIB (indirect 0x0A00) */
+struct i40e_aqc_lldp_get_mib {
+       u8     type;
+       u8     reserved1;
+#define I40E_AQ_LLDP_MIB_TYPE_MASK                      0x3
+#define I40E_AQ_LLDP_MIB_LOCAL                          0x0
+#define I40E_AQ_LLDP_MIB_REMOTE                         0x1
+#define I40E_AQ_LLDP_MIB_LOCAL_AND_REMOTE               0x2
+#define I40E_AQ_LLDP_BRIDGE_TYPE_MASK                   0xC
+#define I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT                  0x2
+#define I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE         0x0
+#define I40E_AQ_LLDP_BRIDGE_TYPE_NON_TPMR               0x1
+#define I40E_AQ_LLDP_TX_SHIFT              0x4
+#define I40E_AQ_LLDP_TX_MASK               (0x03 << I40E_AQ_LLDP_TX_SHIFT)
+/* TX pause flags use I40E_AQ_LINK_TX_* above */
+       __le16 local_len;
+       __le16 remote_len;
+       u8     reserved2[2];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_get_mib);
+
+/* Configure LLDP MIB Change Event (direct 0x0A01)
+ * also used for the event (with type in the command field)
+ */
+struct i40e_aqc_lldp_update_mib {
+       u8     command;
+#define I40E_AQ_LLDP_MIB_UPDATE_ENABLE          0x0
+#define I40E_AQ_LLDP_MIB_UPDATE_DISABLE         0x1
+       u8     reserved[7];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_mib);
+
+/* Add LLDP TLV (indirect 0x0A02)
+ * Delete LLDP TLV (indirect 0x0A04)
+ */
+struct i40e_aqc_lldp_add_tlv {
+       u8     type; /* only nearest bridge and non-TPMR from 0x0A00 */
+       u8     reserved1[1];
+       __le16 len;
+       u8     reserved2[4];
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_add_tlv);
+
+/* Update LLDP TLV (indirect 0x0A03) */
+struct i40e_aqc_lldp_update_tlv {
+       u8     type; /* only nearest bridge and non-TPMR from 0x0A00 */
+       u8     reserved;
+       __le16 old_len;
+       __le16 new_offset;
+       __le16 new_len;
+       __le32 addr_high;
+       __le32 addr_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_update_tlv);
+
+/* Stop LLDP (direct 0x0A05) */
+struct i40e_aqc_lldp_stop {
+       u8     command;
+#define I40E_AQ_LLDP_AGENT_STOP                 0x0
+#define I40E_AQ_LLDP_AGENT_SHUTDOWN             0x1
+       u8     reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_stop);
+
+/* Start LLDP (direct 0x0A06) */
+
+struct i40e_aqc_lldp_start {
+       u8     command;
+#define I40E_AQ_LLDP_AGENT_START                0x1
+       u8     reserved[15];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_lldp_start);
+
+/* Apply MIB changes (0x0A07)
+ * uses the generic struc as it contains no data
+ */
+
+/* Add Udp Tunnel command and completion (direct 0x0B00) */
+struct i40e_aqc_add_udp_tunnel {
+       __le16 udp_port;
+       u8     header_len; /* in DWords, 1 to 15 */
+       u8     protocol_index;
+#define I40E_AQC_TUNNEL_TYPE_MAC    0x0
+#define I40E_AQC_TUNNEL_TYPE_UDP    0x1
+       u8     reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_add_udp_tunnel);
+
+/* remove UDP Tunnel command (0x0B01) */
+struct i40e_aqc_remove_udp_tunnel {
+       u8     reserved[2];
+       u8     index; /* 0 to 15 */
+       u8     pf_filters;
+       u8     total_filters;
+       u8     reserved2[11];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_remove_udp_tunnel);
+
+struct i40e_aqc_del_udp_tunnel_completion {
+       __le16 udp_port;
+       u8     index; /* 0 to 15 */
+       u8     multiple_entries;
+       u8     tunnels_used;
+       u8     reserved;
+       u8     tunnels_free;
+       u8     reserved1[9];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_del_udp_tunnel_completion);
+
+/* tunnel key structure 0x0B10 */
+struct i40e_aqc_tunnel_key_structure {
+       __le16     key1_off;
+       __le16     key1_len;
+       __le16     key2_off;
+       __le16     key2_len;
+       __le16     flags;
+#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDE 0x01
+/* response flags */
+#define I40E_AQC_TUNNEL_KEY_STRUCT_SUCCESS    0x01
+#define I40E_AQC_TUNNEL_KEY_STRUCT_MODIFIED   0x02
+#define I40E_AQC_TUNNEL_KEY_STRUCT_OVERRIDDEN 0x03
+       u8         resreved[6];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_tunnel_key_structure);
+
+/* OEM mode commands (direct 0xFE0x) */
+struct i40e_aqc_oem_param_change {
+       __le32 param_type;
+#define I40E_AQ_OEM_PARAM_TYPE_PF_CTL   0
+#define I40E_AQ_OEM_PARAM_TYPE_BW_CTL   1
+#define I40E_AQ_OEM_PARAM_MAC           2
+       __le32 param_value1;
+       u8     param_value2[8];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_param_change);
+
+struct i40e_aqc_oem_state_change {
+       __le32 state;
+#define I40E_AQ_OEM_STATE_LINK_DOWN  0x0
+#define I40E_AQ_OEM_STATE_LINK_UP    0x1
+       u8     reserved[12];
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_oem_state_change);
+
+/* debug commands */
+
+/* get device id (0xFF00) uses the generic structure */
+
+/* set test more (0xFF01, internal) */
+
+struct i40e_acq_set_test_mode {
+       u8     mode;
+#define I40E_AQ_TEST_PARTIAL    0
+#define I40E_AQ_TEST_FULL       1
+#define I40E_AQ_TEST_NVM        2
+       u8     reserved[3];
+       u8     command;
+#define I40E_AQ_TEST_OPEN        0
+#define I40E_AQ_TEST_CLOSE       1
+#define I40E_AQ_TEST_INC         2
+       u8     reserved2[3];
+       __le32 address_high;
+       __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_acq_set_test_mode);
+
+/* Debug Read Register command (0xFF03)
+ * Debug Write Register command (0xFF04)
+ */
+struct i40e_aqc_debug_reg_read_write {
+       __le32 reserved;
+       __le32 address;
+       __le32 value_high;
+       __le32 value_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_reg_read_write);
+
+/* Scatter/gather Reg Read  (indirect 0xFF05)
+ * Scatter/gather Reg Write (indirect 0xFF06)
+ */
+
+/* i40e_aq_desc is used for the command */
+struct i40e_aqc_debug_reg_sg_element_data {
+       __le32 address;
+       __le32 value;
+};
+
+/* Debug Modify register (direct 0xFF07) */
+struct i40e_aqc_debug_modify_reg {
+       __le32 address;
+       __le32 value;
+       __le32 clear_mask;
+       __le32 set_mask;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_reg);
+
+/* dump internal data (0xFF08, indirect) */
+
+#define I40E_AQ_CLUSTER_ID_AUX         0
+#define I40E_AQ_CLUSTER_ID_SWITCH_FLU  1
+#define I40E_AQ_CLUSTER_ID_TXSCHED     2
+#define I40E_AQ_CLUSTER_ID_HMC         3
+#define I40E_AQ_CLUSTER_ID_MAC0                4
+#define I40E_AQ_CLUSTER_ID_MAC1                5
+#define I40E_AQ_CLUSTER_ID_MAC2                6
+#define I40E_AQ_CLUSTER_ID_MAC3                7
+#define I40E_AQ_CLUSTER_ID_DCB         8
+#define I40E_AQ_CLUSTER_ID_EMP_MEM     9
+#define I40E_AQ_CLUSTER_ID_PKT_BUF     10
+
+struct i40e_aqc_debug_dump_internals {
+       u8     cluster_id;
+       u8     table_id;
+       __le16 data_size;
+       __le32 idx;
+       __le32 address_high;
+       __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_dump_internals);
+
+struct i40e_aqc_debug_modify_internals {
+       u8     cluster_id;
+       u8     cluster_specific_params[7];
+       __le32 address_high;
+       __le32 address_low;
+};
+
+I40E_CHECK_CMD_LENGTH(i40e_aqc_debug_modify_internals);
+
+#endif
diff --git a/drivers/net/ethernet/intel/i40e/i40e_alloc.h b/drivers/net/ethernet/intel/i40e/i40e_alloc.h
new file mode 100644 (file)
index 0000000..3b1cc21
--- /dev/null
@@ -0,0 +1,59 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_ALLOC_H_
+#define _I40E_ALLOC_H_
+
+struct i40e_hw;
+
+/* Memory allocation types */
+enum i40e_memory_type {
+       i40e_mem_arq_buf = 0,           /* ARQ indirect command buffer */
+       i40e_mem_asq_buf = 1,
+       i40e_mem_atq_buf = 2,           /* ATQ indirect command buffer */
+       i40e_mem_arq_ring = 3,          /* ARQ descriptor ring */
+       i40e_mem_atq_ring = 4,          /* ATQ descriptor ring */
+       i40e_mem_pd = 5,                /* Page Descriptor */
+       i40e_mem_bp = 6,                /* Backing Page - 4KB */
+       i40e_mem_bp_jumbo = 7,          /* Backing Page - > 4KB */
+       i40e_mem_reserved
+};
+
+/* prototype for functions used for dynamic memory allocation */
+i40e_status i40e_allocate_dma_mem(struct i40e_hw *hw,
+                                           struct i40e_dma_mem *mem,
+                                           enum i40e_memory_type type,
+                                           u64 size, u32 alignment);
+i40e_status i40e_free_dma_mem(struct i40e_hw *hw,
+                                       struct i40e_dma_mem *mem);
+i40e_status i40e_allocate_virt_mem(struct i40e_hw *hw,
+                                            struct i40e_virt_mem *mem,
+                                            u32 size);
+i40e_status i40e_free_virt_mem(struct i40e_hw *hw,
+                                        struct i40e_virt_mem *mem);
+
+#endif /* _I40E_ALLOC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c
new file mode 100644 (file)
index 0000000..c21df7b
--- /dev/null
@@ -0,0 +1,2041 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_type.h"
+#include "i40e_adminq.h"
+#include "i40e_prototype.h"
+#include "i40e_virtchnl.h"
+
+/**
+ * i40e_set_mac_type - Sets MAC type
+ * @hw: pointer to the HW structure
+ *
+ * This function sets the mac type of the adapter based on the
+ * vendor ID and device ID stored in the hw structure.
+ **/
+static i40e_status i40e_set_mac_type(struct i40e_hw *hw)
+{
+       i40e_status status = 0;
+
+       if (hw->vendor_id == PCI_VENDOR_ID_INTEL) {
+               switch (hw->device_id) {
+               case I40E_SFP_XL710_DEVICE_ID:
+               case I40E_SFP_X710_DEVICE_ID:
+               case I40E_QEMU_DEVICE_ID:
+               case I40E_KX_A_DEVICE_ID:
+               case I40E_KX_B_DEVICE_ID:
+               case I40E_KX_C_DEVICE_ID:
+               case I40E_KX_D_DEVICE_ID:
+               case I40E_QSFP_A_DEVICE_ID:
+               case I40E_QSFP_B_DEVICE_ID:
+               case I40E_QSFP_C_DEVICE_ID:
+                       hw->mac.type = I40E_MAC_XL710;
+                       break;
+               case I40E_VF_DEVICE_ID:
+               case I40E_VF_HV_DEVICE_ID:
+                       hw->mac.type = I40E_MAC_VF;
+                       break;
+               default:
+                       hw->mac.type = I40E_MAC_GENERIC;
+                       break;
+               }
+       } else {
+               status = I40E_ERR_DEVICE_NOT_SUPPORTED;
+       }
+
+       hw_dbg(hw, "i40e_set_mac_type found mac: %d, returns: %d\n",
+                 hw->mac.type, status);
+       return status;
+}
+
+/**
+ * i40e_debug_aq
+ * @hw: debug mask related to admin queue
+ * @cap: pointer to adminq command descriptor
+ * @buffer: pointer to command buffer
+ *
+ * Dumps debug log about adminq command with descriptor contents.
+ **/
+void i40e_debug_aq(struct i40e_hw *hw, enum i40e_debug_mask mask, void *desc,
+                  void *buffer)
+{
+       struct i40e_aq_desc *aq_desc = (struct i40e_aq_desc *)desc;
+       u8 *aq_buffer = (u8 *)buffer;
+       u32 data[4];
+       u32 i = 0;
+
+       if ((!(mask & hw->debug_mask)) || (desc == NULL))
+               return;
+
+       i40e_debug(hw, mask,
+                  "AQ CMD: opcode 0x%04X, flags 0x%04X, datalen 0x%04X, retval 0x%04X\n",
+                  aq_desc->opcode, aq_desc->flags, aq_desc->datalen,
+                  aq_desc->retval);
+       i40e_debug(hw, mask, "\tcookie (h,l) 0x%08X 0x%08X\n",
+                  aq_desc->cookie_high, aq_desc->cookie_low);
+       i40e_debug(hw, mask, "\tparam (0,1)  0x%08X 0x%08X\n",
+                  aq_desc->params.internal.param0,
+                  aq_desc->params.internal.param1);
+       i40e_debug(hw, mask, "\taddr (h,l)   0x%08X 0x%08X\n",
+                  aq_desc->params.external.addr_high,
+                  aq_desc->params.external.addr_low);
+
+       if ((buffer != NULL) && (aq_desc->datalen != 0)) {
+               memset(data, 0, sizeof(data));
+               i40e_debug(hw, mask, "AQ CMD Buffer:\n");
+               for (i = 0; i < le16_to_cpu(aq_desc->datalen); i++) {
+                       data[((i % 16) / 4)] |=
+                               ((u32)aq_buffer[i]) << (8 * (i % 4));
+                       if ((i % 16) == 15) {
+                               i40e_debug(hw, mask,
+                                          "\t0x%04X  %08X %08X %08X %08X\n",
+                                          i - 15, data[0], data[1], data[2],
+                                          data[3]);
+                               memset(data, 0, sizeof(data));
+                       }
+               }
+               if ((i % 16) != 0)
+                       i40e_debug(hw, mask, "\t0x%04X  %08X %08X %08X %08X\n",
+                                  i - (i % 16), data[0], data[1], data[2],
+                                  data[3]);
+       }
+}
+
+/**
+ * i40e_init_shared_code - Initialize the shared code
+ * @hw: pointer to hardware structure
+ *
+ * This assigns the MAC type and PHY code and inits the NVM.
+ * Does not touch the hardware. This function must be called prior to any
+ * other function in the shared code. The i40e_hw structure should be
+ * memset to 0 prior to calling this function.  The following fields in
+ * hw structure should be filled in prior to calling this function:
+ * hw_addr, back, device_id, vendor_id, subsystem_device_id,
+ * subsystem_vendor_id, and revision_id
+ **/
+i40e_status i40e_init_shared_code(struct i40e_hw *hw)
+{
+       i40e_status status = 0;
+       u32 reg;
+
+       hw->phy.get_link_info = true;
+
+       /* Determine port number */
+       reg = rd32(hw, I40E_PFGEN_PORTNUM);
+       reg = ((reg & I40E_PFGEN_PORTNUM_PORT_NUM_MASK) >>
+              I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT);
+       hw->port = (u8)reg;
+
+       i40e_set_mac_type(hw);
+
+       switch (hw->mac.type) {
+       case I40E_MAC_XL710:
+               break;
+       default:
+               return I40E_ERR_DEVICE_NOT_SUPPORTED;
+               break;
+       }
+
+       status = i40e_init_nvm(hw);
+       return status;
+}
+
+/**
+ * i40e_aq_mac_address_read - Retrieve the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: a return indicator of what addresses were added to the addr store
+ * @addrs: the requestor's mac addr store
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+static i40e_status i40e_aq_mac_address_read(struct i40e_hw *hw,
+                                  u16 *flags,
+                                  struct i40e_aqc_mac_address_read_data *addrs,
+                                  struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_mac_address_read *cmd_data =
+               (struct i40e_aqc_mac_address_read *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_mac_address_read);
+       desc.flags |= cpu_to_le16(I40E_AQ_FLAG_BUF);
+
+       status = i40e_asq_send_command(hw, &desc, addrs,
+                                      sizeof(*addrs), cmd_details);
+       *flags = le16_to_cpu(cmd_data->command_flags);
+
+       return status;
+}
+
+/**
+ * i40e_aq_mac_address_write - Change the MAC addresses
+ * @hw: pointer to the hw struct
+ * @flags: indicates which MAC to be written
+ * @mac_addr: address to write
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
+                                   u16 flags, u8 *mac_addr,
+                                   struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_mac_address_write *cmd_data =
+               (struct i40e_aqc_mac_address_write *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_mac_address_write);
+       cmd_data->command_flags = cpu_to_le16(flags);
+       memcpy(&cmd_data->mac_sal, &mac_addr[0], 4);
+       memcpy(&cmd_data->mac_sah, &mac_addr[4], 2);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_get_mac_addr - get MAC address
+ * @hw: pointer to the HW structure
+ * @mac_addr: pointer to MAC address
+ *
+ * Reads the adapter's MAC address from register
+ **/
+i40e_status i40e_get_mac_addr(struct i40e_hw *hw, u8 *mac_addr)
+{
+       struct i40e_aqc_mac_address_read_data addrs;
+       i40e_status status;
+       u16 flags = 0;
+
+       status = i40e_aq_mac_address_read(hw, &flags, &addrs, NULL);
+
+       if (flags & I40E_AQC_LAN_ADDR_VALID)
+               memcpy(mac_addr, &addrs.pf_lan_mac, sizeof(addrs.pf_lan_mac));
+
+       return status;
+}
+
+/**
+ * i40e_validate_mac_addr - Validate MAC address
+ * @mac_addr: pointer to MAC address
+ *
+ * Tests a MAC address to ensure it is a valid Individual Address
+ **/
+i40e_status i40e_validate_mac_addr(u8 *mac_addr)
+{
+       i40e_status status = 0;
+
+       /* Make sure it is not a multicast address */
+       if (I40E_IS_MULTICAST(mac_addr)) {
+               hw_dbg(hw, "MAC address is multicast\n");
+               status = I40E_ERR_INVALID_MAC_ADDR;
+       /* Not a broadcast address */
+       } else if (I40E_IS_BROADCAST(mac_addr)) {
+               hw_dbg(hw, "MAC address is broadcast\n");
+               status = I40E_ERR_INVALID_MAC_ADDR;
+       /* Reject the zero address */
+       } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 &&
+                  mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) {
+               hw_dbg(hw, "MAC address is all zeros\n");
+               status = I40E_ERR_INVALID_MAC_ADDR;
+       }
+       return status;
+}
+
+/**
+ * i40e_pf_reset - Reset the PF
+ * @hw: pointer to the hardware structure
+ *
+ * Assuming someone else has triggered a global reset,
+ * assure the global reset is complete and then reset the PF
+ **/
+i40e_status i40e_pf_reset(struct i40e_hw *hw)
+{
+       u32 wait_cnt = 0;
+       u32 reg = 0;
+       u32 grst_del;
+
+       /* Poll for Global Reset steady state in case of recent GRST.
+        * The grst delay value is in 100ms units, and we'll wait a
+        * couple counts longer to be sure we don't just miss the end.
+        */
+       grst_del = rd32(hw, I40E_GLGEN_RSTCTL) & I40E_GLGEN_RSTCTL_GRSTDEL_MASK
+                       >> I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT;
+       for (wait_cnt = 0; wait_cnt < grst_del + 2; wait_cnt++) {
+               reg = rd32(hw, I40E_GLGEN_RSTAT);
+               if (!(reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK))
+                       break;
+               msleep(100);
+       }
+       if (reg & I40E_GLGEN_RSTAT_DEVSTATE_MASK) {
+               hw_dbg(hw, "Global reset polling failed to complete.\n");
+               return I40E_ERR_RESET_FAILED;
+       }
+
+       /* Determine the PF number based on the PCI fn */
+       hw->pf_id = (u8)hw->bus.func;
+
+       /* If there was a Global Reset in progress when we got here,
+        * we don't need to do the PF Reset
+        */
+       if (!wait_cnt) {
+               reg = rd32(hw, I40E_PFGEN_CTRL);
+               wr32(hw, I40E_PFGEN_CTRL,
+                    (reg | I40E_PFGEN_CTRL_PFSWR_MASK));
+               for (wait_cnt = 0; wait_cnt < 10; wait_cnt++) {
+                       reg = rd32(hw, I40E_PFGEN_CTRL);
+                       if (!(reg & I40E_PFGEN_CTRL_PFSWR_MASK))
+                               break;
+                       usleep_range(1000, 2000);
+               }
+               if (reg & I40E_PFGEN_CTRL_PFSWR_MASK) {
+                       hw_dbg(hw, "PF reset polling failed to complete.\n");
+                       return I40E_ERR_RESET_FAILED;
+               }
+       }
+
+       i40e_clear_pxe_mode(hw);
+       return 0;
+}
+
+/**
+ * i40e_clear_pxe_mode - clear pxe operations mode
+ * @hw: pointer to the hw struct
+ *
+ * Make sure all PXE mode settings are cleared, including things
+ * like descriptor fetch/write-back mode.
+ **/
+void i40e_clear_pxe_mode(struct i40e_hw *hw)
+{
+       u32 reg;
+
+       /* Clear single descriptor fetch/write-back mode */
+       reg = rd32(hw, I40E_GLLAN_RCTL_0);
+       wr32(hw, I40E_GLLAN_RCTL_0, (reg | I40E_GLLAN_RCTL_0_PXE_MODE_MASK));
+}
+
+/**
+ * i40e_led_get - return current on/off mode
+ * @hw: pointer to the hw struct
+ *
+ * The value returned is the 'mode' field as defined in the
+ * GPIO register definitions: 0x0 = off, 0xf = on, and other
+ * values are variations of possible behaviors relating to
+ * blink, link, and wire.
+ **/
+u32 i40e_led_get(struct i40e_hw *hw)
+{
+       u32 gpio_val = 0;
+       u32 mode = 0;
+       u32 port;
+       int i;
+
+       for (i = 0; i < I40E_HW_CAP_MAX_GPIO; i++) {
+               if (!hw->func_caps.led[i])
+                       continue;
+
+               gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(i));
+               port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK)
+                       >> I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
+
+               if (port != hw->port)
+                       continue;
+
+               mode = (gpio_val & I40E_GLGEN_GPIO_CTL_LED_MODE_MASK)
+                               >> I40E_GLGEN_GPIO_CTL_INT_MODE_SHIFT;
+               break;
+       }
+
+       return mode;
+}
+
+/**
+ * i40e_led_set - set new on/off mode
+ * @hw: pointer to the hw struct
+ * @mode: 0=off, else on (see EAS for mode details)
+ **/
+void i40e_led_set(struct i40e_hw *hw, u32 mode)
+{
+       u32 gpio_val = 0;
+       u32 led_mode = 0;
+       u32 port;
+       int i;
+
+       for (i = 0; i < I40E_HW_CAP_MAX_GPIO; i++) {
+               if (!hw->func_caps.led[i])
+                       continue;
+
+               gpio_val = rd32(hw, I40E_GLGEN_GPIO_CTL(i));
+               port = (gpio_val & I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK)
+                       >> I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT;
+
+               if (port != hw->port)
+                       continue;
+
+               led_mode = (mode << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT) &
+                           I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
+               gpio_val &= ~I40E_GLGEN_GPIO_CTL_LED_MODE_MASK;
+               gpio_val |= led_mode;
+               wr32(hw, I40E_GLGEN_GPIO_CTL(i), gpio_val);
+       }
+}
+
+/* Admin command wrappers */
+/**
+ * i40e_aq_queue_shutdown
+ * @hw: pointer to the hw struct
+ * @unloading: is the driver unloading itself
+ *
+ * Tell the Firmware that we're shutting down the AdminQ and whether
+ * or not the driver is unloading as well.
+ **/
+i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
+                                            bool unloading)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_queue_shutdown *cmd =
+               (struct i40e_aqc_queue_shutdown *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_queue_shutdown);
+
+       if (unloading)
+               cmd->driver_unloading = cpu_to_le32(I40E_AQ_DRIVER_UNLOADING);
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, NULL);
+
+       return status;
+}
+
+/**
+ * i40e_aq_set_link_restart_an
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Sets up the link and restarts the Auto-Negotiation over the link.
+ **/
+i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_link_restart_an *cmd =
+               (struct i40e_aqc_set_link_restart_an *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_set_link_restart_an);
+
+       cmd->command = I40E_AQ_PHY_RESTART_AN;
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_get_link_info
+ * @hw: pointer to the hw struct
+ * @enable_lse: enable/disable LinkStatusEvent reporting
+ * @link: pointer to link status structure - optional
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Returns the link status of the adapter.
+ **/
+i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
+                               bool enable_lse, struct i40e_link_status *link,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_get_link_status *resp =
+               (struct i40e_aqc_get_link_status *)&desc.params.raw;
+       struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+       i40e_status status;
+       u16 command_flags;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_link_status);
+
+       if (enable_lse)
+               command_flags = I40E_AQ_LSE_ENABLE;
+       else
+               command_flags = I40E_AQ_LSE_DISABLE;
+       resp->command_flags = cpu_to_le16(command_flags);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       if (status)
+               goto aq_get_link_info_exit;
+
+       /* save off old link status information */
+       memcpy(&hw->phy.link_info_old, hw_link_info,
+              sizeof(struct i40e_link_status));
+
+       /* update link status */
+       hw_link_info->phy_type = (enum i40e_aq_phy_type)resp->phy_type;
+       hw_link_info->link_speed = (enum i40e_aq_link_speed)resp->link_speed;
+       hw_link_info->link_info = resp->link_info;
+       hw_link_info->an_info = resp->an_info;
+       hw_link_info->ext_info = resp->ext_info;
+
+       if (resp->command_flags & cpu_to_le16(I40E_AQ_LSE_ENABLE))
+               hw_link_info->lse_enable = true;
+       else
+               hw_link_info->lse_enable = false;
+
+       /* save link status information */
+       if (link)
+               memcpy(link, hw_link_info, sizeof(struct i40e_link_status));
+
+       /* flag cleared so helper functions don't call AQ again */
+       hw->phy.get_link_info = false;
+
+aq_get_link_info_exit:
+       return status;
+}
+
+/**
+ * i40e_aq_add_vsi
+ * @hw: pointer to the hw struct
+ * @vsi: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add a VSI context to the hardware.
+**/
+i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_add_get_update_vsi *cmd =
+               (struct i40e_aqc_add_get_update_vsi *)&desc.params.raw;
+       struct i40e_aqc_add_get_update_vsi_completion *resp =
+               (struct i40e_aqc_add_get_update_vsi_completion *)
+               &desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_add_vsi);
+
+       cmd->uplink_seid = cpu_to_le16(vsi_ctx->uplink_seid);
+       cmd->connection_type = vsi_ctx->connection_type;
+       cmd->vf_id = vsi_ctx->vf_num;
+       cmd->vsi_flags = cpu_to_le16(vsi_ctx->flags);
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+                                   sizeof(vsi_ctx->info), cmd_details);
+
+       if (status)
+               goto aq_add_vsi_exit;
+
+       vsi_ctx->seid = le16_to_cpu(resp->seid);
+       vsi_ctx->vsi_number = le16_to_cpu(resp->vsi_number);
+       vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
+       vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+
+aq_add_vsi_exit:
+       return status;
+}
+
+/**
+ * i40e_aq_set_vsi_unicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set unicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+                               u16 seid, bool set, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+       i40e_status status;
+       u16 flags = 0;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       if (set)
+               flags |= I40E_AQC_SET_VSI_PROMISC_UNICAST;
+
+       cmd->promiscuous_flags = cpu_to_le16(flags);
+
+       cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_UNICAST);
+
+       cmd->seid = cpu_to_le16(seid);
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_set_vsi_multicast_promiscuous
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set: set multicast promiscuous enable/disable
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+                               u16 seid, bool set, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+       i40e_status status;
+       u16 flags = 0;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       if (set)
+               flags |= I40E_AQC_SET_VSI_PROMISC_MULTICAST;
+
+       cmd->promiscuous_flags = cpu_to_le16(flags);
+
+       cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_MULTICAST);
+
+       cmd->seid = cpu_to_le16(seid);
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_set_vsi_broadcast
+ * @hw: pointer to the hw struct
+ * @seid: vsi number
+ * @set_filter: true to set filter, false to clear filter
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Set or clear the broadcast promiscuous flag (filter) for a given VSI.
+ **/
+i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+                               u16 seid, bool set_filter,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_set_vsi_promiscuous_modes *cmd =
+               (struct i40e_aqc_set_vsi_promiscuous_modes *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_vsi_promiscuous_modes);
+
+       if (set_filter)
+               cmd->promiscuous_flags
+                           |= cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+       else
+               cmd->promiscuous_flags
+                           &= cpu_to_le16(~I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+
+       cmd->valid_flags = cpu_to_le16(I40E_AQC_SET_VSI_PROMISC_BROADCAST);
+       cmd->seid = cpu_to_le16(seid);
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_get_vsi_params - get VSI configuration info
+ * @hw: pointer to the hw struct
+ * @vsi: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_switch_seid *cmd =
+               (struct i40e_aqc_switch_seid *)&desc.params.raw;
+       struct i40e_aqc_add_get_update_vsi_completion *resp =
+               (struct i40e_aqc_add_get_update_vsi_completion *)
+               &desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_get_vsi_parameters);
+
+       cmd->seid = cpu_to_le16(vsi_ctx->seid);
+
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+                                   sizeof(vsi_ctx->info), NULL);
+
+       if (status)
+               goto aq_get_vsi_params_exit;
+
+       vsi_ctx->seid = le16_to_cpu(resp->seid);
+       vsi_ctx->vsi_number = le16_to_cpu(resp->vsi_number);
+       vsi_ctx->vsis_allocated = le16_to_cpu(resp->vsi_used);
+       vsi_ctx->vsis_unallocated = le16_to_cpu(resp->vsi_free);
+
+aq_get_vsi_params_exit:
+       return status;
+}
+
+/**
+ * i40e_aq_update_vsi_params
+ * @hw: pointer to the hw struct
+ * @vsi: pointer to a vsi context struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Update a VSI context.
+ **/
+i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_switch_seid *cmd =
+               (struct i40e_aqc_switch_seid *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_update_vsi_parameters);
+       cmd->seid = cpu_to_le16(vsi_ctx->seid);
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (sizeof(vsi_ctx->info) > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, &vsi_ctx->info,
+                                   sizeof(vsi_ctx->info), cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_get_switch_config
+ * @hw: pointer to the hardware structure
+ * @buf: pointer to the result buffer
+ * @buf_size: length of input buffer
+ * @start_seid: seid to start for the report, 0 == beginning
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Fill the buf with switch configuration returned from AdminQ command
+ **/
+i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
+                               struct i40e_aqc_get_switch_config_resp *buf,
+                               u16 buf_size, u16 *start_seid,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_switch_seid *scfg =
+               (struct i40e_aqc_switch_seid *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_get_switch_config);
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (buf_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+       scfg->seid = cpu_to_le16(*start_seid);
+
+       status = i40e_asq_send_command(hw, &desc, buf, buf_size, cmd_details);
+       *start_seid = le16_to_cpu(scfg->seid);
+
+       return status;
+}
+
+/**
+ * i40e_aq_get_firmware_version
+ * @hw: pointer to the hw struct
+ * @fw_major_version: firmware major version
+ * @fw_minor_version: firmware minor version
+ * @api_major_version: major queue version
+ * @api_minor_version: minor queue version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the firmware version from the admin queue commands
+ **/
+i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw,
+                               u16 *fw_major_version, u16 *fw_minor_version,
+                               u16 *api_major_version, u16 *api_minor_version,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_get_version *resp =
+               (struct i40e_aqc_get_version *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_get_version);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       if (!status) {
+               if (fw_major_version != NULL)
+                       *fw_major_version = le16_to_cpu(resp->fw_major);
+               if (fw_minor_version != NULL)
+                       *fw_minor_version = le16_to_cpu(resp->fw_minor);
+               if (api_major_version != NULL)
+                       *api_major_version = le16_to_cpu(resp->api_major);
+               if (api_minor_version != NULL)
+                       *api_minor_version = le16_to_cpu(resp->api_minor);
+       }
+
+       return status;
+}
+
+/**
+ * i40e_aq_send_driver_version
+ * @hw: pointer to the hw struct
+ * @event: driver event: driver ok, start or stop
+ * @dv: driver's major, minor version
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Send the driver version to the firmware
+ **/
+i40e_status i40e_aq_send_driver_version(struct i40e_hw *hw,
+                               struct i40e_driver_version *dv,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_driver_version *cmd =
+               (struct i40e_aqc_driver_version *)&desc.params.raw;
+       i40e_status status;
+
+       if (dv == NULL)
+               return I40E_ERR_PARAM;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_driver_version);
+
+       desc.flags |= cpu_to_le16(I40E_AQ_FLAG_SI);
+       cmd->driver_major_ver = dv->major_version;
+       cmd->driver_minor_ver = dv->minor_version;
+       cmd->driver_build_ver = dv->build_version;
+       cmd->driver_subbuild_ver = dv->subbuild_version;
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_get_link_status - get status of the HW network link
+ * @hw: pointer to the hw struct
+ *
+ * Returns true if link is up, false if link is down.
+ *
+ * Side effect: LinkStatusEvent reporting becomes enabled
+ **/
+bool i40e_get_link_status(struct i40e_hw *hw)
+{
+       i40e_status status = 0;
+       bool link_status = false;
+
+       if (hw->phy.get_link_info) {
+               status = i40e_aq_get_link_info(hw, true, NULL, NULL);
+
+               if (status)
+                       goto i40e_get_link_status_exit;
+       }
+
+       link_status = hw->phy.link_info.link_info & I40E_AQ_LINK_UP;
+
+i40e_get_link_status_exit:
+       return link_status;
+}
+
+/**
+ * i40e_aq_add_veb - Insert a VEB between the VSI and the MAC
+ * @hw: pointer to the hw struct
+ * @uplink_seid: the MAC or other gizmo SEID
+ * @downlink_seid: the VSI SEID
+ * @enabled_tc: bitmap of TCs to be enabled
+ * @default_port: true for default port VSI, false for control port
+ * @veb_seid: pointer to where to put the resulting VEB SEID
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This asks the FW to add a VEB between the uplink and downlink
+ * elements.  If the uplink SEID is 0, this will be a floating VEB.
+ **/
+i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+                               u16 downlink_seid, u8 enabled_tc,
+                               bool default_port, u16 *veb_seid,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_add_veb *cmd =
+               (struct i40e_aqc_add_veb *)&desc.params.raw;
+       struct i40e_aqc_add_veb_completion *resp =
+               (struct i40e_aqc_add_veb_completion *)&desc.params.raw;
+       i40e_status status;
+       u16 veb_flags = 0;
+
+       /* SEIDs need to either both be set or both be 0 for floating VEB */
+       if (!!uplink_seid != !!downlink_seid)
+               return I40E_ERR_PARAM;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_veb);
+
+       cmd->uplink_seid = cpu_to_le16(uplink_seid);
+       cmd->downlink_seid = cpu_to_le16(downlink_seid);
+       cmd->enable_tcs = enabled_tc;
+       if (!uplink_seid)
+               veb_flags |= I40E_AQC_ADD_VEB_FLOATING;
+       if (default_port)
+               veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DEFAULT;
+       else
+               veb_flags |= I40E_AQC_ADD_VEB_PORT_TYPE_DATA;
+       cmd->veb_flags = cpu_to_le16(veb_flags);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       if (!status && veb_seid)
+               *veb_seid = le16_to_cpu(resp->veb_seid);
+
+       return status;
+}
+
+/**
+ * i40e_aq_get_veb_parameters - Retrieve VEB parameters
+ * @hw: pointer to the hw struct
+ * @veb_seid: the SEID of the VEB to query
+ * @switch_id: the uplink switch id
+ * @floating_veb: set to true if the VEB is floating
+ * @statistic_index: index of the stats counter block for this VEB
+ * @vebs_used: number of VEB's used by function
+ * @vebs_unallocated: total VEB's not reserved by any function
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This retrieves the parameters for a particular VEB, specified by
+ * uplink_seid, and returns them to the caller.
+ **/
+i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+                               u16 veb_seid, u16 *switch_id,
+                               bool *floating, u16 *statistic_index,
+                               u16 *vebs_used, u16 *vebs_free,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_get_veb_parameters_completion *cmd_resp =
+               (struct i40e_aqc_get_veb_parameters_completion *)
+               &desc.params.raw;
+       i40e_status status;
+
+       if (veb_seid == 0)
+               return I40E_ERR_PARAM;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                         i40e_aqc_opc_get_veb_parameters);
+       cmd_resp->seid = cpu_to_le16(veb_seid);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+       if (status)
+               goto get_veb_exit;
+
+       if (switch_id)
+               *switch_id = le16_to_cpu(cmd_resp->switch_id);
+       if (statistic_index)
+               *statistic_index = le16_to_cpu(cmd_resp->statistic_index);
+       if (vebs_used)
+               *vebs_used = le16_to_cpu(cmd_resp->vebs_used);
+       if (vebs_free)
+               *vebs_free = le16_to_cpu(cmd_resp->vebs_free);
+       if (floating) {
+               u16 flags = le16_to_cpu(cmd_resp->veb_flags);
+               if (flags & I40E_AQC_ADD_VEB_FLOATING)
+                       *floating = true;
+               else
+                       *floating = false;
+       }
+
+get_veb_exit:
+       return status;
+}
+
+/**
+ * i40e_aq_add_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Add MAC/VLAN addresses to the HW filtering
+ **/
+i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 seid,
+                       struct i40e_aqc_add_macvlan_element_data *mv_list,
+                       u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_macvlan *cmd =
+               (struct i40e_aqc_macvlan *)&desc.params.raw;
+       i40e_status status;
+       u16 buf_size;
+
+       if (count == 0 || !mv_list || !hw)
+               return I40E_ERR_PARAM;
+
+       buf_size = count * sizeof(struct i40e_aqc_add_macvlan_element_data);
+
+       /* prep the rest of the request */
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_macvlan);
+       cmd->num_addresses = cpu_to_le16(count);
+       cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+       cmd->seid[1] = 0;
+       cmd->seid[2] = 0;
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (buf_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
+                                   cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_remove_macvlan
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the mac address
+ * @mv_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Remove MAC/VLAN addresses from the HW filtering
+ **/
+i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 seid,
+                       struct i40e_aqc_remove_macvlan_element_data *mv_list,
+                       u16 count, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_macvlan *cmd =
+               (struct i40e_aqc_macvlan *)&desc.params.raw;
+       i40e_status status;
+       u16 buf_size;
+
+       if (count == 0 || !mv_list || !hw)
+               return I40E_ERR_PARAM;
+
+       buf_size = count * sizeof(struct i40e_aqc_remove_macvlan_element_data);
+
+       /* prep the rest of the request */
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_macvlan);
+       cmd->num_addresses = cpu_to_le16(count);
+       cmd->seid[0] = cpu_to_le16(I40E_AQC_MACVLAN_CMD_SEID_VALID | seid);
+       cmd->seid[1] = 0;
+       cmd->seid[2] = 0;
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (buf_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, mv_list, buf_size,
+                                      cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_add_vlan - Add VLAN ids to the HW filtering
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the vlan filters
+ * @v_list: list of vlan filters to be added
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_add_vlan(struct i40e_hw *hw, u16 seid,
+                       struct i40e_aqc_add_remove_vlan_element_data *v_list,
+                       u8 count, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_macvlan *cmd =
+               (struct i40e_aqc_macvlan *)&desc.params.raw;
+       i40e_status status;
+       u16 buf_size;
+
+       if (count == 0 || !v_list || !hw)
+               return I40E_ERR_PARAM;
+
+       buf_size = count * sizeof(struct i40e_aqc_add_remove_vlan_element_data);
+
+       /* prep the rest of the request */
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_add_vlan);
+       cmd->num_addresses = cpu_to_le16(count);
+       cmd->seid[0] = cpu_to_le16(seid | I40E_AQC_MACVLAN_CMD_SEID_VALID);
+       cmd->seid[1] = 0;
+       cmd->seid[2] = 0;
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (buf_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, v_list, buf_size,
+                                      cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_remove_vlan - Remove VLANs from the HW filtering
+ * @hw: pointer to the hw struct
+ * @seid: VSI for the vlan filters
+ * @v_list: list of macvlans to be removed
+ * @count: length of the list
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_remove_vlan(struct i40e_hw *hw, u16 seid,
+                       struct i40e_aqc_add_remove_vlan_element_data *v_list,
+                       u8 count, struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_macvlan *cmd =
+               (struct i40e_aqc_macvlan *)&desc.params.raw;
+       i40e_status status;
+       u16 buf_size;
+
+       if (count == 0 || !v_list || !hw)
+               return I40E_ERR_PARAM;
+
+       buf_size = count * sizeof(struct i40e_aqc_add_remove_vlan_element_data);
+
+       /* prep the rest of the request */
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_remove_vlan);
+       cmd->num_addresses = cpu_to_le16(count);
+       cmd->seid[0] = cpu_to_le16(seid | I40E_AQC_MACVLAN_CMD_SEID_VALID);
+       cmd->seid[1] = 0;
+       cmd->seid[2] = 0;
+
+       desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF | I40E_AQ_FLAG_RD));
+       if (buf_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, v_list, buf_size,
+                                      cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_send_msg_to_vf
+ * @hw: pointer to the hardware structure
+ * @vfid: vf id to send msg
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @cmd_details: pointer to command details
+ *
+ * send msg to vf
+ **/
+i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+                               u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_pf_vf_message *cmd =
+               (struct i40e_aqc_pf_vf_message *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_send_msg_to_vf);
+       cmd->id = cpu_to_le32(vfid);
+       desc.cookie_high = cpu_to_le32(v_opcode);
+       desc.cookie_low = cpu_to_le32(v_retval);
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_SI);
+       if (msglen) {
+               desc.flags |= cpu_to_le16((u16)(I40E_AQ_FLAG_BUF |
+                                               I40E_AQ_FLAG_RD));
+               if (msglen > I40E_AQ_LARGE_BUF)
+                       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+               desc.datalen = cpu_to_le16(msglen);
+       }
+       status = i40e_asq_send_command(hw, &desc, msg, msglen, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_set_hmc_resource_profile
+ * @hw: pointer to the hw struct
+ * @profile: type of profile the HMC is to be set as
+ * @pe_vf_enabled_count: the number of PE enabled VFs the system has
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * set the HMC profile of the device.
+ **/
+i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw,
+                               enum i40e_aq_hmc_profile profile,
+                               u8 pe_vf_enabled_count,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aq_get_set_hmc_resource_profile *cmd =
+               (struct i40e_aq_get_set_hmc_resource_profile *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc,
+                                       i40e_aqc_opc_set_hmc_resource_profile);
+
+       cmd->pm_profile = (u8)profile;
+       cmd->pe_vf_enabled = pe_vf_enabled_count;
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_request_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @access: access type
+ * @sdp_number: resource number
+ * @timeout: the maximum time in ms that the driver may hold the resource
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * requests common resource using the admin queue commands
+ **/
+i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
+                               enum i40e_aq_resources_ids resource,
+                               enum i40e_aq_resource_access_type access,
+                               u8 sdp_number, u64 *timeout,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_request_resource *cmd_resp =
+               (struct i40e_aqc_request_resource *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_request_resource);
+
+       cmd_resp->resource_id = cpu_to_le16(resource);
+       cmd_resp->access_type = cpu_to_le16(access);
+       cmd_resp->resource_number = cpu_to_le32(sdp_number);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+       /* The completion specifies the maximum time in ms that the driver
+        * may hold the resource in the Timeout field.
+        * If the resource is held by someone else, the command completes with
+        * busy return value and the timeout field indicates the maximum time
+        * the current owner of the resource has to free it.
+        */
+       if (!status || hw->aq.asq_last_status == I40E_AQ_RC_EBUSY)
+               *timeout = le32_to_cpu(cmd_resp->timeout);
+
+       return status;
+}
+
+/**
+ * i40e_aq_release_resource
+ * @hw: pointer to the hw struct
+ * @resource: resource id
+ * @sdp_number: resource number
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * release common resource using the admin queue commands
+ **/
+i40e_status i40e_aq_release_resource(struct i40e_hw *hw,
+                               enum i40e_aq_resources_ids resource,
+                               u8 sdp_number,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_request_resource *cmd =
+               (struct i40e_aqc_request_resource *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_release_resource);
+
+       cmd->resource_id = cpu_to_le16(resource);
+       cmd->resource_number = cpu_to_le32(sdp_number);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_read_nvm
+ * @hw: pointer to the hw struct
+ * @module_pointer: module pointer location in words from the NVM beginning
+ * @offset: byte offset from the module beginning
+ * @length: length of the section to be read (in bytes from the offset)
+ * @data: command buffer (size [bytes] = length)
+ * @last_command: tells if this is the last command in a series
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Read the NVM using the admin queue commands
+ **/
+i40e_status i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+                               u32 offset, u16 length, void *data,
+                               bool last_command,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_nvm_update *cmd =
+               (struct i40e_aqc_nvm_update *)&desc.params.raw;
+       i40e_status status;
+
+       /* In offset the highest byte must be zeroed. */
+       if (offset & 0xFF000000) {
+               status = I40E_ERR_PARAM;
+               goto i40e_aq_read_nvm_exit;
+       }
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_nvm_read);
+
+       /* If this is the last command in a series, set the proper flag. */
+       if (last_command)
+               cmd->command_flags |= I40E_AQ_NVM_LAST_CMD;
+       cmd->module_pointer = module_pointer;
+       cmd->offset = cpu_to_le32(offset);
+       cmd->length = cpu_to_le16(length);
+
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (length > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, data, length, cmd_details);
+
+i40e_aq_read_nvm_exit:
+       return status;
+}
+
+#define I40E_DEV_FUNC_CAP_SWITCH_MODE  0x01
+#define I40E_DEV_FUNC_CAP_MGMT_MODE    0x02
+#define I40E_DEV_FUNC_CAP_NPAR         0x03
+#define I40E_DEV_FUNC_CAP_OS2BMC       0x04
+#define I40E_DEV_FUNC_CAP_VALID_FUNC   0x05
+#define I40E_DEV_FUNC_CAP_SRIOV_1_1    0x12
+#define I40E_DEV_FUNC_CAP_VF           0x13
+#define I40E_DEV_FUNC_CAP_VMDQ         0x14
+#define I40E_DEV_FUNC_CAP_802_1_QBG    0x15
+#define I40E_DEV_FUNC_CAP_802_1_QBH    0x16
+#define I40E_DEV_FUNC_CAP_VSI          0x17
+#define I40E_DEV_FUNC_CAP_DCB          0x18
+#define I40E_DEV_FUNC_CAP_FCOE         0x21
+#define I40E_DEV_FUNC_CAP_RSS          0x40
+#define I40E_DEV_FUNC_CAP_RX_QUEUES    0x41
+#define I40E_DEV_FUNC_CAP_TX_QUEUES    0x42
+#define I40E_DEV_FUNC_CAP_MSIX         0x43
+#define I40E_DEV_FUNC_CAP_MSIX_VF      0x44
+#define I40E_DEV_FUNC_CAP_FLOW_DIRECTOR        0x45
+#define I40E_DEV_FUNC_CAP_IEEE_1588    0x46
+#define I40E_DEV_FUNC_CAP_MFP_MODE_1   0xF1
+#define I40E_DEV_FUNC_CAP_CEM          0xF2
+#define I40E_DEV_FUNC_CAP_IWARP                0x51
+#define I40E_DEV_FUNC_CAP_LED          0x61
+#define I40E_DEV_FUNC_CAP_SDP          0x62
+#define I40E_DEV_FUNC_CAP_MDIO         0x63
+
+/**
+ * i40e_parse_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: pointer to a buffer containing device/function capability records
+ * @cap_count: number of capability records in the list
+ * @list_type_opc: type of capabilities list to parse
+ *
+ * Parse the device/function capabilities list.
+ **/
+static void i40e_parse_discover_capabilities(struct i40e_hw *hw, void *buff,
+                                    u32 cap_count,
+                                    enum i40e_admin_queue_opc list_type_opc)
+{
+       struct i40e_aqc_list_capabilities_element_resp *cap;
+       u32 number, logical_id, phys_id;
+       struct i40e_hw_capabilities *p;
+       u32 reg_val;
+       u32 i = 0;
+       u16 id;
+
+       cap = (struct i40e_aqc_list_capabilities_element_resp *) buff;
+
+       if (list_type_opc == i40e_aqc_opc_list_dev_capabilities)
+               p = (struct i40e_hw_capabilities *)&hw->dev_caps;
+       else if (list_type_opc == i40e_aqc_opc_list_func_capabilities)
+               p = (struct i40e_hw_capabilities *)&hw->func_caps;
+       else
+               return;
+
+       for (i = 0; i < cap_count; i++, cap++) {
+               id = le16_to_cpu(cap->id);
+               number = le32_to_cpu(cap->number);
+               logical_id = le32_to_cpu(cap->logical_id);
+               phys_id = le32_to_cpu(cap->phys_id);
+
+               switch (id) {
+               case I40E_DEV_FUNC_CAP_SWITCH_MODE:
+                       p->switch_mode = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_MGMT_MODE:
+                       p->management_mode = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_NPAR:
+                       p->npar_enable = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_OS2BMC:
+                       p->os2bmc = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_VALID_FUNC:
+                       p->valid_functions = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_SRIOV_1_1:
+                       if (number == 1)
+                               p->sr_iov_1_1 = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_VF:
+                       p->num_vfs = number;
+                       p->vf_base_id = logical_id;
+                       break;
+               case I40E_DEV_FUNC_CAP_VMDQ:
+                       if (number == 1)
+                               p->vmdq = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_802_1_QBG:
+                       if (number == 1)
+                               p->evb_802_1_qbg = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_802_1_QBH:
+                       if (number == 1)
+                               p->evb_802_1_qbh = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_VSI:
+                       p->num_vsis = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_DCB:
+                       if (number == 1) {
+                               p->dcb = true;
+                               p->enabled_tcmap = logical_id;
+                               p->maxtc = phys_id;
+                       }
+                       break;
+               case I40E_DEV_FUNC_CAP_FCOE:
+                       if (number == 1)
+                               p->fcoe = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_RSS:
+                       p->rss = true;
+                       reg_val = rd32(hw, I40E_PFQF_CTL_0);
+                       if (reg_val & I40E_PFQF_CTL_0_HASHLUTSIZE_MASK)
+                               p->rss_table_size = number;
+                       else
+                               p->rss_table_size = 128;
+                       p->rss_table_entry_width = logical_id;
+                       break;
+               case I40E_DEV_FUNC_CAP_RX_QUEUES:
+                       p->num_rx_qp = number;
+                       p->base_queue = phys_id;
+                       break;
+               case I40E_DEV_FUNC_CAP_TX_QUEUES:
+                       p->num_tx_qp = number;
+                       p->base_queue = phys_id;
+                       break;
+               case I40E_DEV_FUNC_CAP_MSIX:
+                       p->num_msix_vectors = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_MSIX_VF:
+                       p->num_msix_vectors_vf = number;
+                       break;
+               case I40E_DEV_FUNC_CAP_MFP_MODE_1:
+                       if (number == 1)
+                               p->mfp_mode_1 = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_CEM:
+                       if (number == 1)
+                               p->mgmt_cem = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_IWARP:
+                       if (number == 1)
+                               p->iwarp = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_LED:
+                       if (phys_id < I40E_HW_CAP_MAX_GPIO)
+                               p->led[phys_id] = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_SDP:
+                       if (phys_id < I40E_HW_CAP_MAX_GPIO)
+                               p->sdp[phys_id] = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_MDIO:
+                       if (number == 1) {
+                               p->mdio_port_num = phys_id;
+                               p->mdio_port_mode = logical_id;
+                       }
+                       break;
+               case I40E_DEV_FUNC_CAP_IEEE_1588:
+                       if (number == 1)
+                               p->ieee_1588 = true;
+                       break;
+               case I40E_DEV_FUNC_CAP_FLOW_DIRECTOR:
+                       p->fd = true;
+                       p->fd_filters_guaranteed = number;
+                       p->fd_filters_best_effort = logical_id;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       /* additional HW specific goodies that might
+        * someday be HW version specific
+        */
+       p->rx_buf_chain_len = I40E_MAX_CHAINED_RX_BUFFERS;
+}
+
+/**
+ * i40e_aq_discover_capabilities
+ * @hw: pointer to the hw struct
+ * @buff: a virtual buffer to hold the capabilities
+ * @buff_size: Size of the virtual buffer
+ * @data_size: Size of the returned data, or buff size needed if AQ err==ENOMEM
+ * @list_type_opc: capabilities type to discover - pass in the command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Get the device capabilities descriptions from the firmware
+ **/
+i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
+                               void *buff, u16 buff_size, u16 *data_size,
+                               enum i40e_admin_queue_opc list_type_opc,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aqc_list_capabilites *cmd;
+       i40e_status status = 0;
+       struct i40e_aq_desc desc;
+
+       cmd = (struct i40e_aqc_list_capabilites *)&desc.params.raw;
+
+       if (list_type_opc != i40e_aqc_opc_list_func_capabilities &&
+               list_type_opc != i40e_aqc_opc_list_dev_capabilities) {
+               status = I40E_ERR_PARAM;
+               goto exit;
+       }
+
+       i40e_fill_default_direct_cmd_desc(&desc, list_type_opc);
+
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (buff_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+       *data_size = le16_to_cpu(desc.datalen);
+
+       if (status)
+               goto exit;
+
+       i40e_parse_discover_capabilities(hw, buff, le32_to_cpu(cmd->count),
+                                        list_type_opc);
+
+exit:
+       return status;
+}
+
+/**
+ * i40e_aq_get_lldp_mib
+ * @hw: pointer to the hw struct
+ * @bridge_type: type of bridge requested
+ * @mib_type: Local, Remote or both Local and Remote MIBs
+ * @buff: pointer to a user supplied buffer to store the MIB block
+ * @buff_size: size of the buffer (in bytes)
+ * @local_len : length of the returned Local LLDP MIB
+ * @remote_len: length of the returned Remote LLDP MIB
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Requests the complete LLDP MIB (entire packet).
+ **/
+i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+                               u8 mib_type, void *buff, u16 buff_size,
+                               u16 *local_len, u16 *remote_len,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_lldp_get_mib *cmd =
+               (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+       struct i40e_aqc_lldp_get_mib *resp =
+               (struct i40e_aqc_lldp_get_mib *)&desc.params.raw;
+       i40e_status status;
+
+       if (buff_size == 0 || !buff)
+               return I40E_ERR_PARAM;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_get_mib);
+       /* Indirect Command */
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+
+       cmd->type = mib_type & I40E_AQ_LLDP_MIB_TYPE_MASK;
+       cmd->type |= ((bridge_type << I40E_AQ_LLDP_BRIDGE_TYPE_SHIFT) &
+                      I40E_AQ_LLDP_BRIDGE_TYPE_MASK);
+
+       desc.datalen = cpu_to_le16(buff_size);
+
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (buff_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+       if (!status) {
+               if (local_len != NULL)
+                       *local_len = le16_to_cpu(resp->local_len);
+               if (remote_len != NULL)
+                       *remote_len = le16_to_cpu(resp->remote_len);
+       }
+
+       return status;
+}
+
+/**
+ * i40e_aq_cfg_lldp_mib_change_event
+ * @hw: pointer to the hw struct
+ * @enable_update: Enable or Disable event posting
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Enable or Disable posting of an event on ARQ when LLDP MIB
+ * associated with the interface changes
+ **/
+i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+                               bool enable_update,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_lldp_update_mib *cmd =
+               (struct i40e_aqc_lldp_update_mib *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_update_mib);
+
+       if (!enable_update)
+               cmd->command |= I40E_AQ_LLDP_MIB_UPDATE_DISABLE;
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_stop_lldp
+ * @hw: pointer to the hw struct
+ * @shutdown_agent: True if LLDP Agent needs to be Shutdown
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Stop or Shutdown the embedded LLDP Agent
+ **/
+i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_lldp_stop *cmd =
+               (struct i40e_aqc_lldp_stop *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_stop);
+
+       if (shutdown_agent)
+               cmd->command |= I40E_AQ_LLDP_AGENT_SHUTDOWN;
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_start_lldp
+ * @hw: pointer to the hw struct
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Start the embedded LLDP Agent on all ports.
+ **/
+i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_lldp_start *cmd =
+               (struct i40e_aqc_lldp_start *)&desc.params.raw;
+       i40e_status status;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_lldp_start);
+
+       cmd->command = I40E_AQ_LLDP_AGENT_START;
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_delete_element - Delete switch element
+ * @hw: pointer to the hw struct
+ * @seid: the SEID to delete from the switch
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * This deletes a switch element from the switch.
+ **/
+i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_switch_seid *cmd =
+               (struct i40e_aqc_switch_seid *)&desc.params.raw;
+       i40e_status status;
+
+       if (seid == 0)
+               return I40E_ERR_PARAM;
+
+       i40e_fill_default_direct_cmd_desc(&desc, i40e_aqc_opc_delete_element);
+
+       cmd->seid = cpu_to_le16(seid);
+
+       status = i40e_asq_send_command(hw, &desc, NULL, 0, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_tx_sched_cmd - generic Tx scheduler AQ command handler
+ * @hw: pointer to the hw struct
+ * @seid: seid for the physical port/switching component/vsi
+ * @buff: Indirect buffer to hold data parameters and response
+ * @buff_size: Indirect buffer size
+ * @opcode: Tx scheduler AQ command opcode
+ * @cmd_details: pointer to command details structure or NULL
+ *
+ * Generic command handler for Tx scheduler AQ commands
+ **/
+static i40e_status i40e_aq_tx_sched_cmd(struct i40e_hw *hw, u16 seid,
+                               void *buff, u16 buff_size,
+                                enum i40e_admin_queue_opc opcode,
+                               struct i40e_asq_cmd_details *cmd_details)
+{
+       struct i40e_aq_desc desc;
+       struct i40e_aqc_tx_sched_ind *cmd =
+               (struct i40e_aqc_tx_sched_ind *)&desc.params.raw;
+       i40e_status status;
+       bool cmd_param_flag = false;
+
+       switch (opcode) {
+       case i40e_aqc_opc_configure_vsi_ets_sla_bw_limit:
+       case i40e_aqc_opc_configure_vsi_tc_bw:
+       case i40e_aqc_opc_enable_switching_comp_ets:
+       case i40e_aqc_opc_modify_switching_comp_ets:
+       case i40e_aqc_opc_disable_switching_comp_ets:
+       case i40e_aqc_opc_configure_switching_comp_ets_bw_limit:
+       case i40e_aqc_opc_configure_switching_comp_bw_config:
+               cmd_param_flag = true;
+               break;
+       case i40e_aqc_opc_query_vsi_bw_config:
+       case i40e_aqc_opc_query_vsi_ets_sla_config:
+       case i40e_aqc_opc_query_switching_comp_ets_config:
+       case i40e_aqc_opc_query_port_ets_config:
+       case i40e_aqc_opc_query_switching_comp_bw_config:
+               cmd_param_flag = false;
+               break;
+       default:
+               return I40E_ERR_PARAM;
+       }
+
+       i40e_fill_default_direct_cmd_desc(&desc, opcode);
+
+       /* Indirect command */
+       desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_BUF);
+       if (cmd_param_flag)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_RD);
+       if (buff_size > I40E_AQ_LARGE_BUF)
+               desc.flags |= cpu_to_le16((u16)I40E_AQ_FLAG_LB);
+
+       desc.datalen = cpu_to_le16(buff_size);
+
+       cmd->vsi_seid = cpu_to_le16(seid);
+
+       status = i40e_asq_send_command(hw, &desc, buff, buff_size, cmd_details);
+
+       return status;
+}
+
+/**
+ * i40e_aq_config_vsi_tc_bw - Config VSI BW Allocation per TC
+ * @hw: pointer to the hw struct
+ * @seid: VSI seid
+ * @bw_data: Buffer holding enabled TCs, relative TC BW limit/credits
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                   i40e_aqc_opc_configure_vsi_tc_bw,
+                                   cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_bw_config - Query VSI BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                   i40e_aqc_opc_query_vsi_bw_config,
+                                   cmd_details);
+}
+
+/**
+ * i40e_aq_query_vsi_ets_sla_config - Query VSI BW configuration per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI
+ * @bw_data: Buffer to hold VSI BW configuration per TC
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                   i40e_aqc_opc_query_vsi_ets_sla_config,
+                                   cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_ets_config - Query Switch comp BW config per TC
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's per TC BW config
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+               u16 seid,
+               struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+               struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                  i40e_aqc_opc_query_switching_comp_ets_config,
+                                  cmd_details);
+}
+
+/**
+ * i40e_aq_query_port_ets_config - Query Physical Port ETS configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the VSI or switching component connected to Physical Port
+ * @bw_data: Buffer to hold current ETS configuration for the Physical Port
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_query_port_ets_config_resp *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                   i40e_aqc_opc_query_port_ets_config,
+                                   cmd_details);
+}
+
+/**
+ * i40e_aq_query_switch_comp_bw_config - Query Switch comp BW configuration
+ * @hw: pointer to the hw struct
+ * @seid: seid of the switching component
+ * @bw_data: Buffer to hold switching component's BW configuration
+ * @cmd_details: pointer to command details structure or NULL
+ **/
+i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+               u16 seid,
+               struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+               struct i40e_asq_cmd_details *cmd_details)
+{
+       return i40e_aq_tx_sched_cmd(hw, seid, (void *)bw_data, sizeof(*bw_data),
+                                   i40e_aqc_opc_query_switching_comp_bw_config,
+                                   cmd_details);
+}
+
+/**
+ * i40e_validate_filter_settings
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Check and validate the filter control settings passed.
+ * The function checks for the valid filter/context sizes being
+ * passed for FCoE and PE.
+ *
+ * Returns 0 if the values passed are valid and within
+ * range else returns an error.
+ **/
+static i40e_status i40e_validate_filter_settings(struct i40e_hw *hw,
+                               struct i40e_filter_control_settings *settings)
+{
+       u32 fcoe_cntx_size, fcoe_filt_size;
+       u32 pe_cntx_size, pe_filt_size;
+       u32 fcoe_fmax, pe_fmax;
+       u32 val;
+
+       /* Validate FCoE settings passed */
+       switch (settings->fcoe_filt_num) {
+       case I40E_HASH_FILTER_SIZE_1K:
+       case I40E_HASH_FILTER_SIZE_2K:
+       case I40E_HASH_FILTER_SIZE_4K:
+       case I40E_HASH_FILTER_SIZE_8K:
+       case I40E_HASH_FILTER_SIZE_16K:
+       case I40E_HASH_FILTER_SIZE_32K:
+               fcoe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
+               fcoe_filt_size <<= (u32)settings->fcoe_filt_num;
+               break;
+       default:
+               return I40E_ERR_PARAM;
+       }
+
+       switch (settings->fcoe_cntx_num) {
+       case I40E_DMA_CNTX_SIZE_512:
+       case I40E_DMA_CNTX_SIZE_1K:
+       case I40E_DMA_CNTX_SIZE_2K:
+       case I40E_DMA_CNTX_SIZE_4K:
+               fcoe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
+               fcoe_cntx_size <<= (u32)settings->fcoe_cntx_num;
+               break;
+       default:
+               return I40E_ERR_PARAM;
+       }
+
+       /* Validate PE settings passed */
+       switch (settings->pe_filt_num) {
+       case I40E_HASH_FILTER_SIZE_1K:
+       case I40E_HASH_FILTER_SIZE_2K:
+       case I40E_HASH_FILTER_SIZE_4K:
+       case I40E_HASH_FILTER_SIZE_8K:
+       case I40E_HASH_FILTER_SIZE_16K:
+       case I40E_HASH_FILTER_SIZE_32K:
+       case I40E_HASH_FILTER_SIZE_64K:
+       case I40E_HASH_FILTER_SIZE_128K:
+       case I40E_HASH_FILTER_SIZE_256K:
+       case I40E_HASH_FILTER_SIZE_512K:
+       case I40E_HASH_FILTER_SIZE_1M:
+               pe_filt_size = I40E_HASH_FILTER_BASE_SIZE;
+               pe_filt_size <<= (u32)settings->pe_filt_num;
+               break;
+       default:
+               return I40E_ERR_PARAM;
+       }
+
+       switch (settings->pe_cntx_num) {
+       case I40E_DMA_CNTX_SIZE_512:
+       case I40E_DMA_CNTX_SIZE_1K:
+       case I40E_DMA_CNTX_SIZE_2K:
+       case I40E_DMA_CNTX_SIZE_4K:
+       case I40E_DMA_CNTX_SIZE_8K:
+       case I40E_DMA_CNTX_SIZE_16K:
+       case I40E_DMA_CNTX_SIZE_32K:
+       case I40E_DMA_CNTX_SIZE_64K:
+       case I40E_DMA_CNTX_SIZE_128K:
+       case I40E_DMA_CNTX_SIZE_256K:
+               pe_cntx_size = I40E_DMA_CNTX_BASE_SIZE;
+               pe_cntx_size <<= (u32)settings->pe_cntx_num;
+               break;
+       default:
+               return I40E_ERR_PARAM;
+       }
+
+       /* FCHSIZE + FCDSIZE should not be greater than PMFCOEFMAX */
+       val = rd32(hw, I40E_GLHMC_FCOEFMAX);
+       fcoe_fmax = (val & I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK)
+                    >> I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT;
+       if (fcoe_filt_size + fcoe_cntx_size >  fcoe_fmax)
+               return I40E_ERR_INVALID_SIZE;
+
+       /* PEHSIZE + PEDSIZE should not be greater than PMPEXFMAX */
+       val = rd32(hw, I40E_GLHMC_PEXFMAX);
+       pe_fmax = (val & I40E_GLHMC_PEXFMAX_PMPEXFMAX_MASK)
+                  >> I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT;
+       if (pe_filt_size + pe_cntx_size >  pe_fmax)
+               return I40E_ERR_INVALID_SIZE;
+
+       return 0;
+}
+
+/**
+ * i40e_set_filter_control
+ * @hw: pointer to the hardware structure
+ * @settings: Filter control settings
+ *
+ * Set the Queue Filters for PE/FCoE and enable filters required
+ * for a single PF. It is expected that these settings are programmed
+ * at the driver initialization time.
+ **/
+i40e_status i40e_set_filter_control(struct i40e_hw *hw,
+                               struct i40e_filter_control_settings *settings)
+{
+       i40e_status ret = 0;
+       u32 hash_lut_size = 0;
+       u32 val;
+
+       if (!settings)
+               return I40E_ERR_PARAM;
+
+       /* Validate the input settings */
+       ret = i40e_validate_filter_settings(hw, settings);
+       if (ret)
+               return ret;
+
+       /* Read the PF Queue Filter control register */
+       val = rd32(hw, I40E_PFQF_CTL_0);
+
+       /* Program required PE hash buckets for the PF */
+       val &= ~I40E_PFQF_CTL_0_PEHSIZE_MASK;
+       val |= ((u32)settings->pe_filt_num << I40E_PFQF_CTL_0_PEHSIZE_SHIFT) &
+               I40E_PFQF_CTL_0_PEHSIZE_MASK;
+       /* Program required PE contexts for the PF */
+       val &= ~I40E_PFQF_CTL_0_PEDSIZE_MASK;
+       val |= ((u32)settings->pe_cntx_num << I40E_PFQF_CTL_0_PEDSIZE_SHIFT) &
+               I40E_PFQF_CTL_0_PEDSIZE_MASK;
+
+       /* Program required FCoE hash buckets for the PF */
+       val &= ~I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+       val |= ((u32)settings->fcoe_filt_num <<
+                       I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT) &
+               I40E_PFQF_CTL_0_PFFCHSIZE_MASK;
+       /* Program required FCoE DDP contexts for the PF */
+       val &= ~I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+       val |= ((u32)settings->fcoe_cntx_num <<
+                       I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT) &
+               I40E_PFQF_CTL_0_PFFCDSIZE_MASK;
+
+       /* Program Hash LUT size for the PF */
+       val &= ~I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+       if (settings->hash_lut_size == I40E_HASH_LUT_SIZE_512)
+               hash_lut_size = 1;
+       val |= (hash_lut_size << I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT) &
+               I40E_PFQF_CTL_0_HASHLUTSIZE_MASK;
+
+       /* Enable FDIR, Ethertype and MACVLAN filters for PF and VFs */
+       if (settings->enable_fdir)
+               val |= I40E_PFQF_CTL_0_FD_ENA_MASK;
+       if (settings->enable_ethtype)
+               val |= I40E_PFQF_CTL_0_ETYPE_ENA_MASK;
+       if (settings->enable_macvlan)
+               val |= I40E_PFQF_CTL_0_MACVLAN_ENA_MASK;
+
+       wr32(hw, I40E_PFQF_CTL_0, val);
+
+       return 0;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_debugfs.c b/drivers/net/ethernet/intel/i40e/i40e_debugfs.c
new file mode 100644 (file)
index 0000000..8dbd91f
--- /dev/null
@@ -0,0 +1,2076 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifdef CONFIG_DEBUG_FS
+
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+
+#include "i40e.h"
+
+static struct dentry *i40e_dbg_root;
+
+/**
+ * i40e_dbg_find_vsi - searches for the vsi with the given seid
+ * @pf - the pf structure to search for the vsi
+ * @seid - seid of the vsi it is searching for
+ **/
+static struct i40e_vsi *i40e_dbg_find_vsi(struct i40e_pf *pf, int seid)
+{
+       int i;
+
+       if (seid < 0)
+               dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
+       else
+               for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+                       if (pf->vsi[i] && (pf->vsi[i]->seid == seid))
+                               return pf->vsi[i];
+
+       return NULL;
+}
+
+/**
+ * i40e_dbg_find_veb - searches for the veb with the given seid
+ * @pf - the pf structure to search for the veb
+ * @seid - seid of the veb it is searching for
+ **/
+static struct i40e_veb *i40e_dbg_find_veb(struct i40e_pf *pf, int seid)
+{
+       int i;
+
+       if ((seid < I40E_BASE_VEB_SEID) ||
+           (seid > (I40E_BASE_VEB_SEID + I40E_MAX_VEB)))
+               dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
+       else
+               for (i = 0; i < I40E_MAX_VEB; i++)
+                       if (pf->veb[i] && pf->veb[i]->seid == seid)
+                               return pf->veb[i];
+       return NULL;
+}
+
+/**************************************************************
+ * dump
+ * The dump entry in debugfs is for getting a data snapshow of
+ * the driver's current configuration and runtime details.
+ * When the filesystem entry is written, a snapshot is taken.
+ * When the entry is read, the most recent snapshot data is dumped.
+ **************************************************************/
+static char *i40e_dbg_dump_buf;
+static ssize_t i40e_dbg_dump_data_len;
+static ssize_t i40e_dbg_dump_buffer_len;
+
+/**
+ * i40e_dbg_dump_read - read the dump data
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_dump_read(struct file *filp, char __user *buffer,
+                                 size_t count, loff_t *ppos)
+{
+       int bytes_not_copied;
+       int len;
+
+       /* is *ppos bigger than the available data? */
+       if (*ppos >= i40e_dbg_dump_data_len || !i40e_dbg_dump_buf)
+               return 0;
+
+       /* be sure to not read beyond the end of available data */
+       len = min_t(int, count, (i40e_dbg_dump_data_len - *ppos));
+
+       bytes_not_copied = copy_to_user(buffer, &i40e_dbg_dump_buf[*ppos], len);
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+
+       *ppos += len;
+       return len;
+}
+
+/**
+ * i40e_dbg_prep_dump_buf
+ * @pf: the pf we're working with
+ * @buflen: the desired buffer length
+ *
+ * Return positive if success, 0 if failed
+ **/
+static int i40e_dbg_prep_dump_buf(struct i40e_pf *pf, int buflen)
+{
+       /* if not already big enough, prep for re alloc */
+       if (i40e_dbg_dump_buffer_len && i40e_dbg_dump_buffer_len < buflen) {
+               kfree(i40e_dbg_dump_buf);
+               i40e_dbg_dump_buffer_len = 0;
+               i40e_dbg_dump_buf = NULL;
+       }
+
+       /* get a new buffer if needed */
+       if (!i40e_dbg_dump_buf) {
+               i40e_dbg_dump_buf = kzalloc(buflen, GFP_KERNEL);
+               if (i40e_dbg_dump_buf != NULL)
+                       i40e_dbg_dump_buffer_len = buflen;
+       }
+
+       return i40e_dbg_dump_buffer_len;
+}
+
+/**
+ * i40e_dbg_dump_write - trigger a datadump snapshot
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ *
+ * Any write clears the stats
+ **/
+static ssize_t i40e_dbg_dump_write(struct file *filp,
+                                  const char __user *buffer,
+                                  size_t count, loff_t *ppos)
+{
+       struct i40e_pf *pf = filp->private_data;
+       char dump_request_buf[16];
+       bool seid_found = false;
+       int bytes_not_copied;
+       long seid = -1;
+       int buflen = 0;
+       int i, ret;
+       int len;
+       u8 *p;
+
+       /* don't allow partial writes */
+       if (*ppos != 0)
+               return 0;
+       if (count >= sizeof(dump_request_buf))
+               return -ENOSPC;
+
+       bytes_not_copied = copy_from_user(dump_request_buf, buffer, count);
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+       if (bytes_not_copied > 0)
+               count -= bytes_not_copied;
+       dump_request_buf[count] = '\0';
+
+       /* decode the SEID given to be dumped */
+       ret = kstrtol(dump_request_buf, 0, &seid);
+       if (ret < 0) {
+               dev_info(&pf->pdev->dev, "bad seid value '%s'\n",
+                        dump_request_buf);
+       } else if (seid == 0) {
+               seid_found = true;
+
+               kfree(i40e_dbg_dump_buf);
+               i40e_dbg_dump_buffer_len = 0;
+               i40e_dbg_dump_data_len = 0;
+               i40e_dbg_dump_buf = NULL;
+               dev_info(&pf->pdev->dev, "debug buffer freed\n");
+
+       } else if (seid == pf->pf_seid || seid == 1) {
+               seid_found = true;
+
+               buflen = sizeof(struct i40e_pf);
+               buflen += (sizeof(struct i40e_aq_desc)
+                    * (pf->hw.aq.num_arq_entries + pf->hw.aq.num_asq_entries));
+
+               if (i40e_dbg_prep_dump_buf(pf, buflen)) {
+                       p = i40e_dbg_dump_buf;
+
+                       len = sizeof(struct i40e_pf);
+                       memcpy(p, pf, len);
+                       p += len;
+
+                       len = (sizeof(struct i40e_aq_desc)
+                                       * pf->hw.aq.num_asq_entries);
+                       memcpy(p, pf->hw.aq.asq.desc, len);
+                       p += len;
+
+                       len = (sizeof(struct i40e_aq_desc)
+                                       * pf->hw.aq.num_arq_entries);
+                       memcpy(p, pf->hw.aq.arq.desc, len);
+                       p += len;
+
+                       i40e_dbg_dump_data_len = buflen;
+                       dev_info(&pf->pdev->dev,
+                                "PF seid %ld dumped %d bytes\n",
+                                seid, (int)i40e_dbg_dump_data_len);
+               }
+       } else if (seid >= I40E_BASE_VSI_SEID) {
+               struct i40e_vsi *vsi = NULL;
+               struct i40e_mac_filter *f;
+               int filter_count = 0;
+
+               mutex_lock(&pf->switch_mutex);
+               vsi = i40e_dbg_find_vsi(pf, seid);
+               if (!vsi) {
+                       mutex_unlock(&pf->switch_mutex);
+                       goto write_exit;
+               }
+
+               buflen = sizeof(struct i40e_vsi);
+               buflen += sizeof(struct i40e_q_vector) * vsi->num_q_vectors;
+               buflen += sizeof(struct i40e_ring) * 2 * vsi->num_queue_pairs;
+               buflen += sizeof(struct i40e_tx_buffer) * vsi->num_queue_pairs;
+               buflen += sizeof(struct i40e_rx_buffer) * vsi->num_queue_pairs;
+               list_for_each_entry(f, &vsi->mac_filter_list, list)
+                       filter_count++;
+               buflen += sizeof(struct i40e_mac_filter) * filter_count;
+
+               if (i40e_dbg_prep_dump_buf(pf, buflen)) {
+                       p = i40e_dbg_dump_buf;
+                       seid_found = true;
+
+                       len = sizeof(struct i40e_vsi);
+                       memcpy(p, vsi, len);
+                       p += len;
+
+                       len = (sizeof(struct i40e_q_vector)
+                               * vsi->num_q_vectors);
+                       memcpy(p, vsi->q_vectors, len);
+                       p += len;
+
+                       len = (sizeof(struct i40e_ring) * vsi->num_queue_pairs);
+                       memcpy(p, vsi->tx_rings, len);
+                       p += len;
+                       memcpy(p, vsi->rx_rings, len);
+                       p += len;
+
+                       for (i = 0; i < vsi->num_queue_pairs; i++) {
+                               len = sizeof(struct i40e_tx_buffer);
+                               memcpy(p, vsi->tx_rings[i].tx_bi, len);
+                               p += len;
+                       }
+                       for (i = 0; i < vsi->num_queue_pairs; i++) {
+                               len = sizeof(struct i40e_rx_buffer);
+                               memcpy(p, vsi->rx_rings[i].rx_bi, len);
+                               p += len;
+                       }
+
+                       /* macvlan filter list */
+                       len = sizeof(struct i40e_mac_filter);
+                       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+                               memcpy(p, f, len);
+                               p += len;
+                       }
+
+                       i40e_dbg_dump_data_len = buflen;
+                       dev_info(&pf->pdev->dev,
+                                "VSI seid %ld dumped %d bytes\n",
+                                seid, (int)i40e_dbg_dump_data_len);
+               }
+               mutex_unlock(&pf->switch_mutex);
+       } else if (seid >= I40E_BASE_VEB_SEID) {
+               struct i40e_veb *veb = NULL;
+
+               mutex_lock(&pf->switch_mutex);
+               veb = i40e_dbg_find_veb(pf, seid);
+               if (!veb) {
+                       mutex_unlock(&pf->switch_mutex);
+                       goto write_exit;
+               }
+
+               buflen = sizeof(struct i40e_veb);
+               if (i40e_dbg_prep_dump_buf(pf, buflen)) {
+                       seid_found = true;
+                       memcpy(i40e_dbg_dump_buf, veb, buflen);
+                       i40e_dbg_dump_data_len = buflen;
+                       dev_info(&pf->pdev->dev,
+                                "VEB seid %ld dumped %d bytes\n",
+                                seid, (int)i40e_dbg_dump_data_len);
+               }
+               mutex_unlock(&pf->switch_mutex);
+       }
+
+write_exit:
+       if (!seid_found)
+               dev_info(&pf->pdev->dev, "unknown seid %ld\n", seid);
+
+       return count;
+}
+
+static const struct file_operations i40e_dbg_dump_fops = {
+       .owner = THIS_MODULE,
+       .open =  simple_open,
+       .read =  i40e_dbg_dump_read,
+       .write = i40e_dbg_dump_write,
+};
+
+/**************************************************************
+ * command
+ * The command entry in debugfs is for giving the driver commands
+ * to be executed - these may be for changing the internal switch
+ * setup, adding or removing filters, or other things.  Many of
+ * these will be useful for some forms of unit testing.
+ **************************************************************/
+static char i40e_dbg_command_buf[256] = "hello world";
+
+/**
+ * i40e_dbg_command_read - read for command datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_command_read(struct file *filp, char __user *buffer,
+                                    size_t count, loff_t *ppos)
+{
+       struct i40e_pf *pf = filp->private_data;
+       int bytes_not_copied;
+       int buf_size = 256;
+       char *buf;
+       int len;
+
+       /* don't allow partial reads */
+       if (*ppos != 0)
+               return 0;
+       if (count < buf_size)
+               return -ENOSPC;
+
+       buf = kzalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOSPC;
+
+       len = snprintf(buf, buf_size, "%s: %s\n",
+                      pf->vsi[pf->lan_vsi]->netdev->name,
+                      i40e_dbg_command_buf);
+
+       bytes_not_copied = copy_to_user(buffer, buf, len);
+       kfree(buf);
+
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+
+       *ppos = len;
+       return len;
+}
+
+/**
+ * i40e_dbg_dump_vsi_seid - handles dump vsi seid write into pokem datum
+ * @pf: the i40e_pf created in command write
+ * @seid: the seid the user put in
+ **/
+static void i40e_dbg_dump_vsi_seid(struct i40e_pf *pf, int seid)
+{
+       struct rtnl_link_stats64 *nstat;
+       struct i40e_mac_filter *f;
+       struct i40e_vsi *vsi;
+       int i;
+
+       vsi = i40e_dbg_find_vsi(pf, seid);
+       if (!vsi) {
+               dev_info(&pf->pdev->dev,
+                        "dump %d: seid not found\n", seid);
+               return;
+       }
+       dev_info(&pf->pdev->dev, "vsi seid %d\n", seid);
+       if (vsi->netdev)
+               dev_info(&pf->pdev->dev,
+                        "    netdev: name = %s\n",
+                        vsi->netdev->name);
+       if (vsi->active_vlans)
+               dev_info(&pf->pdev->dev,
+                        "    vlgrp: & = %p\n", vsi->active_vlans);
+       dev_info(&pf->pdev->dev,
+                "    netdev_registered = %i, current_netdev_flags = 0x%04x, state = %li flags = 0x%08lx\n",
+                vsi->netdev_registered,
+                vsi->current_netdev_flags, vsi->state, vsi->flags);
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               dev_info(&pf->pdev->dev,
+                        "    mac_filter_list: %pM vid=%d, is_netdev=%d is_vf=%d counter=%d\n",
+                        f->macaddr, f->vlan, f->is_netdev, f->is_vf,
+                        f->counter);
+       }
+       nstat = i40e_get_vsi_stats_struct(vsi);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: rx_packets = %lu, rx_bytes = %lu, rx_errors = %lu, rx_dropped = %lu\n",
+                (long unsigned int)nstat->rx_packets,
+                (long unsigned int)nstat->rx_bytes,
+                (long unsigned int)nstat->rx_errors,
+                (long unsigned int)nstat->rx_dropped);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: tx_packets = %lu, tx_bytes = %lu, tx_errors = %lu, tx_dropped = %lu\n",
+                (long unsigned int)nstat->tx_packets,
+                (long unsigned int)nstat->tx_bytes,
+                (long unsigned int)nstat->tx_errors,
+                (long unsigned int)nstat->tx_dropped);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: multicast = %lu, collisions = %lu\n",
+                (long unsigned int)nstat->multicast,
+                (long unsigned int)nstat->collisions);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: rx_length_errors = %lu, rx_over_errors = %lu, rx_crc_errors = %lu\n",
+                (long unsigned int)nstat->rx_length_errors,
+                (long unsigned int)nstat->rx_over_errors,
+                (long unsigned int)nstat->rx_crc_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: rx_frame_errors = %lu, rx_fifo_errors = %lu, rx_missed_errors = %lu\n",
+                (long unsigned int)nstat->rx_frame_errors,
+                (long unsigned int)nstat->rx_fifo_errors,
+                (long unsigned int)nstat->rx_missed_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: tx_aborted_errors = %lu, tx_carrier_errors = %lu, tx_fifo_errors = %lu\n",
+                (long unsigned int)nstat->tx_aborted_errors,
+                (long unsigned int)nstat->tx_carrier_errors,
+                (long unsigned int)nstat->tx_fifo_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: tx_heartbeat_errors = %lu, tx_window_errors = %lu\n",
+                (long unsigned int)nstat->tx_heartbeat_errors,
+                (long unsigned int)nstat->tx_window_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats: rx_compressed = %lu, tx_compressed = %lu\n",
+                (long unsigned int)nstat->rx_compressed,
+                (long unsigned int)nstat->tx_compressed);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: rx_packets = %lu, rx_bytes = %lu, rx_errors = %lu, rx_dropped = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.rx_packets,
+                (long unsigned int)vsi->net_stats_offsets.rx_bytes,
+                (long unsigned int)vsi->net_stats_offsets.rx_errors,
+                (long unsigned int)vsi->net_stats_offsets.rx_dropped);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: tx_packets = %lu, tx_bytes = %lu, tx_errors = %lu, tx_dropped = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.tx_packets,
+                (long unsigned int)vsi->net_stats_offsets.tx_bytes,
+                (long unsigned int)vsi->net_stats_offsets.tx_errors,
+                (long unsigned int)vsi->net_stats_offsets.tx_dropped);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: multicast = %lu, collisions = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.multicast,
+                (long unsigned int)vsi->net_stats_offsets.collisions);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: rx_length_errors = %lu, rx_over_errors = %lu, rx_crc_errors = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.rx_length_errors,
+                (long unsigned int)vsi->net_stats_offsets.rx_over_errors,
+                (long unsigned int)vsi->net_stats_offsets.rx_crc_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: rx_frame_errors = %lu, rx_fifo_errors = %lu, rx_missed_errors = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.rx_frame_errors,
+                (long unsigned int)vsi->net_stats_offsets.rx_fifo_errors,
+                (long unsigned int)vsi->net_stats_offsets.rx_missed_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: tx_aborted_errors = %lu, tx_carrier_errors = %lu, tx_fifo_errors = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.tx_aborted_errors,
+                (long unsigned int)vsi->net_stats_offsets.tx_carrier_errors,
+                (long unsigned int)vsi->net_stats_offsets.tx_fifo_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: tx_heartbeat_errors = %lu, tx_window_errors = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.tx_heartbeat_errors,
+                (long unsigned int)vsi->net_stats_offsets.tx_window_errors);
+       dev_info(&pf->pdev->dev,
+                "    net_stats_offsets: rx_compressed = %lu, tx_compressed = %lu\n",
+                (long unsigned int)vsi->net_stats_offsets.rx_compressed,
+                (long unsigned int)vsi->net_stats_offsets.tx_compressed);
+       dev_info(&pf->pdev->dev,
+                "    tx_restart = %d, tx_busy = %d, rx_buf_failed = %d, rx_page_failed = %d\n",
+                vsi->tx_restart, vsi->tx_busy,
+                vsi->rx_buf_failed, vsi->rx_page_failed);
+       if (vsi->rx_rings) {
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: desc = %p\n",
+                                i, vsi->rx_rings[i].desc);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: dev = %p, netdev = %p, rx_bi = %p\n",
+                                i, vsi->rx_rings[i].dev,
+                                vsi->rx_rings[i].netdev,
+                                vsi->rx_rings[i].rx_bi);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
+                                i, vsi->rx_rings[i].state,
+                                vsi->rx_rings[i].queue_index,
+                                vsi->rx_rings[i].reg_idx);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: rx_hdr_len = %d, rx_buf_len = %d, dtype = %d\n",
+                                i, vsi->rx_rings[i].rx_hdr_len,
+                                vsi->rx_rings[i].rx_buf_len,
+                                vsi->rx_rings[i].dtype);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+                                i, vsi->rx_rings[i].hsplit,
+                                vsi->rx_rings[i].next_to_use,
+                                vsi->rx_rings[i].next_to_clean,
+                                vsi->rx_rings[i].ring_active);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: rx_stats: packets = %lld, bytes = %lld, non_eop_descs = %lld\n",
+                                i, vsi->rx_rings[i].rx_stats.packets,
+                                vsi->rx_rings[i].rx_stats.bytes,
+                                vsi->rx_rings[i].rx_stats.non_eop_descs);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: rx_stats: alloc_rx_page_failed = %lld, alloc_rx_buff_failed = %lld\n",
+                                i,
+                                vsi->rx_rings[i].rx_stats.alloc_rx_page_failed,
+                               vsi->rx_rings[i].rx_stats.alloc_rx_buff_failed);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: size = %i, dma = 0x%08lx\n",
+                                i, vsi->rx_rings[i].size,
+                                (long unsigned int)vsi->rx_rings[i].dma);
+                       dev_info(&pf->pdev->dev,
+                                "    rx_rings[%i]: vsi = %p, q_vector = %p\n",
+                                i, vsi->rx_rings[i].vsi,
+                                vsi->rx_rings[i].q_vector);
+               }
+       }
+       if (vsi->tx_rings) {
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: desc = %p\n",
+                                i, vsi->tx_rings[i].desc);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: dev = %p, netdev = %p, tx_bi = %p\n",
+                                i, vsi->tx_rings[i].dev,
+                                vsi->tx_rings[i].netdev,
+                                vsi->tx_rings[i].tx_bi);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: state = %li, queue_index = %d, reg_idx = %d\n",
+                                i, vsi->tx_rings[i].state,
+                                vsi->tx_rings[i].queue_index,
+                                vsi->tx_rings[i].reg_idx);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: dtype = %d\n",
+                                i, vsi->tx_rings[i].dtype);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: hsplit = %d, next_to_use = %d, next_to_clean = %d, ring_active = %i\n",
+                                i, vsi->tx_rings[i].hsplit,
+                                vsi->tx_rings[i].next_to_use,
+                                vsi->tx_rings[i].next_to_clean,
+                                vsi->tx_rings[i].ring_active);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: tx_stats: packets = %lld, bytes = %lld, restart_queue = %lld\n",
+                                i, vsi->tx_rings[i].tx_stats.packets,
+                                vsi->tx_rings[i].tx_stats.bytes,
+                                vsi->tx_rings[i].tx_stats.restart_queue);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: tx_stats: tx_busy = %lld, completed = %lld, tx_done_old = %lld\n",
+                                i,
+                                vsi->tx_rings[i].tx_stats.tx_busy,
+                                vsi->tx_rings[i].tx_stats.completed,
+                                vsi->tx_rings[i].tx_stats.tx_done_old);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: size = %i, dma = 0x%08lx\n",
+                                i, vsi->tx_rings[i].size,
+                                (long unsigned int)vsi->tx_rings[i].dma);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: vsi = %p, q_vector = %p\n",
+                                i, vsi->tx_rings[i].vsi,
+                                vsi->tx_rings[i].q_vector);
+                       dev_info(&pf->pdev->dev,
+                                "    tx_rings[%i]: DCB tc = %d\n",
+                                i, vsi->tx_rings[i].dcb_tc);
+               }
+       }
+       dev_info(&pf->pdev->dev,
+                "    work_limit = %d, rx_itr_setting = %d (%s), tx_itr_setting = %d (%s)\n",
+                vsi->work_limit, vsi->rx_itr_setting,
+                ITR_IS_DYNAMIC(vsi->rx_itr_setting) ? "dynamic" : "fixed",
+                vsi->tx_itr_setting,
+                ITR_IS_DYNAMIC(vsi->tx_itr_setting) ? "dynamic" : "fixed");
+       dev_info(&pf->pdev->dev,
+                "    max_frame = %d, rx_hdr_len = %d, rx_buf_len = %d dtype = %d\n",
+                vsi->max_frame, vsi->rx_hdr_len, vsi->rx_buf_len, vsi->dtype);
+       if (vsi->q_vectors) {
+               for (i = 0; i < vsi->num_q_vectors; i++) {
+                       dev_info(&pf->pdev->dev,
+                                "    q_vectors[%i]: base index = %ld\n",
+                                i, ((long int)*vsi->q_vectors[i].rx.ring-
+                                       (long int)*vsi->q_vectors[0].rx.ring)/
+                                       sizeof(struct i40e_ring));
+               }
+       }
+       dev_info(&pf->pdev->dev,
+                "    num_q_vectors = %i, base_vector = %i\n",
+                vsi->num_q_vectors, vsi->base_vector);
+       dev_info(&pf->pdev->dev,
+                "    seid = %d, id = %d, uplink_seid = %d\n",
+                vsi->seid, vsi->id, vsi->uplink_seid);
+       dev_info(&pf->pdev->dev,
+                "    base_queue = %d, num_queue_pairs = %d, num_desc = %d\n",
+                vsi->base_queue, vsi->num_queue_pairs, vsi->num_desc);
+       dev_info(&pf->pdev->dev, "    type = %i\n", vsi->type);
+       dev_info(&pf->pdev->dev,
+                "    info: valid_sections = 0x%04x, switch_id = 0x%04x\n",
+                vsi->info.valid_sections, vsi->info.switch_id);
+       dev_info(&pf->pdev->dev,
+                "    info: sw_reserved[] = 0x%02x 0x%02x\n",
+                vsi->info.sw_reserved[0], vsi->info.sw_reserved[1]);
+       dev_info(&pf->pdev->dev,
+                "    info: sec_flags = 0x%02x, sec_reserved = 0x%02x\n",
+                vsi->info.sec_flags, vsi->info.sec_reserved);
+       dev_info(&pf->pdev->dev,
+                "    info: pvid = 0x%04x, fcoe_pvid = 0x%04x, port_vlan_flags = 0x%02x\n",
+                vsi->info.pvid, vsi->info.fcoe_pvid,
+                vsi->info.port_vlan_flags);
+       dev_info(&pf->pdev->dev,
+                "    info: pvlan_reserved[] = 0x%02x 0x%02x 0x%02x\n",
+                vsi->info.pvlan_reserved[0], vsi->info.pvlan_reserved[1],
+                vsi->info.pvlan_reserved[2]);
+       dev_info(&pf->pdev->dev,
+                "    info: ingress_table = 0x%08x, egress_table = 0x%08x\n",
+                vsi->info.ingress_table, vsi->info.egress_table);
+       dev_info(&pf->pdev->dev,
+                "    info: cas_pv_stag = 0x%04x, cas_pv_flags= 0x%02x, cas_pv_reserved = 0x%02x\n",
+                vsi->info.cas_pv_tag, vsi->info.cas_pv_flags,
+                vsi->info.cas_pv_reserved);
+       dev_info(&pf->pdev->dev,
+                "    info: queue_mapping[0..7 ] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+                vsi->info.queue_mapping[0], vsi->info.queue_mapping[1],
+                vsi->info.queue_mapping[2], vsi->info.queue_mapping[3],
+                vsi->info.queue_mapping[4], vsi->info.queue_mapping[5],
+                vsi->info.queue_mapping[6], vsi->info.queue_mapping[7]);
+       dev_info(&pf->pdev->dev,
+                "    info: queue_mapping[8..15] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+                vsi->info.queue_mapping[8], vsi->info.queue_mapping[9],
+                vsi->info.queue_mapping[10], vsi->info.queue_mapping[11],
+                vsi->info.queue_mapping[12], vsi->info.queue_mapping[13],
+                vsi->info.queue_mapping[14], vsi->info.queue_mapping[15]);
+       dev_info(&pf->pdev->dev,
+                "    info: tc_mapping[] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+                vsi->info.tc_mapping[0], vsi->info.tc_mapping[1],
+                vsi->info.tc_mapping[2], vsi->info.tc_mapping[3],
+                vsi->info.tc_mapping[4], vsi->info.tc_mapping[5],
+                vsi->info.tc_mapping[6], vsi->info.tc_mapping[7]);
+       dev_info(&pf->pdev->dev,
+                "    info: queueing_opt_flags = 0x%02x  queueing_opt_reserved[0..2] = 0x%02x 0x%02x 0x%02x\n",
+                vsi->info.queueing_opt_flags,
+                vsi->info.queueing_opt_reserved[0],
+                vsi->info.queueing_opt_reserved[1],
+                vsi->info.queueing_opt_reserved[2]);
+       dev_info(&pf->pdev->dev,
+                "    info: up_enable_bits = 0x%02x\n",
+                vsi->info.up_enable_bits);
+       dev_info(&pf->pdev->dev,
+                "    info: sched_reserved = 0x%02x, outer_up_table = 0x%04x\n",
+                vsi->info.sched_reserved, vsi->info.outer_up_table);
+       dev_info(&pf->pdev->dev,
+                "    info: cmd_reserved[] = 0x%02x 0x%02x 0x%02x 0x0%02x 0x%02x 0x%02x 0x%02x 0x0%02x\n",
+                vsi->info.cmd_reserved[0], vsi->info.cmd_reserved[1],
+                vsi->info.cmd_reserved[2], vsi->info.cmd_reserved[3],
+                vsi->info.cmd_reserved[4], vsi->info.cmd_reserved[5],
+                vsi->info.cmd_reserved[6], vsi->info.cmd_reserved[7]);
+       dev_info(&pf->pdev->dev,
+                "    info: qs_handle[] = 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x 0x%04x\n",
+                vsi->info.qs_handle[0], vsi->info.qs_handle[1],
+                vsi->info.qs_handle[2], vsi->info.qs_handle[3],
+                vsi->info.qs_handle[4], vsi->info.qs_handle[5],
+                vsi->info.qs_handle[6], vsi->info.qs_handle[7]);
+       dev_info(&pf->pdev->dev,
+                "    info: stat_counter_idx = 0x%04x, sched_id = 0x%04x\n",
+                vsi->info.stat_counter_idx, vsi->info.sched_id);
+       dev_info(&pf->pdev->dev,
+                "    info: resp_reserved[] = 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x 0x%02x\n",
+                vsi->info.resp_reserved[0], vsi->info.resp_reserved[1],
+                vsi->info.resp_reserved[2], vsi->info.resp_reserved[3],
+                vsi->info.resp_reserved[4], vsi->info.resp_reserved[5],
+                vsi->info.resp_reserved[6], vsi->info.resp_reserved[7],
+                vsi->info.resp_reserved[8], vsi->info.resp_reserved[9],
+                vsi->info.resp_reserved[10], vsi->info.resp_reserved[11]);
+       if (vsi->back)
+               dev_info(&pf->pdev->dev, "    pf = %p\n", vsi->back);
+       dev_info(&pf->pdev->dev, "    idx = %d\n", vsi->idx);
+       dev_info(&pf->pdev->dev,
+                "    tc_config: numtc = %d, enabled_tc = 0x%x\n",
+                vsi->tc_config.numtc, vsi->tc_config.enabled_tc);
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               dev_info(&pf->pdev->dev,
+                        "    tc_config: tc = %d, qoffset = %d, qcount = %d, netdev_tc = %d\n",
+                        i, vsi->tc_config.tc_info[i].qoffset,
+                        vsi->tc_config.tc_info[i].qcount,
+                        vsi->tc_config.tc_info[i].netdev_tc);
+       }
+       dev_info(&pf->pdev->dev,
+                "    bw: bw_limit = %d, bw_max_quanta = %d\n",
+                vsi->bw_limit, vsi->bw_max_quanta);
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               dev_info(&pf->pdev->dev,
+                        "    bw[%d]: ets_share_credits = %d, ets_limit_credits = %d, max_quanta = %d\n",
+                        i, vsi->bw_ets_share_credits[i],
+                        vsi->bw_ets_limit_credits[i],
+                        vsi->bw_ets_max_quanta[i]);
+       }
+}
+
+/**
+ * i40e_dbg_dump_aq_desc - handles dump aq_desc write into command datum
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_aq_desc(struct i40e_pf *pf)
+{
+       struct i40e_adminq_ring *ring;
+       struct i40e_hw *hw = &pf->hw;
+       int i;
+
+       /* first the send (command) ring, then the receive (event) ring */
+       dev_info(&pf->pdev->dev, "AdminQ Tx Ring\n");
+       ring = &(hw->aq.asq);
+       for (i = 0; i < ring->count; i++) {
+               struct i40e_aq_desc *d = I40E_ADMINQ_DESC(*ring, i);
+               dev_info(&pf->pdev->dev,
+                        "   at[%02d] flags=0x%04x op=0x%04x dlen=0x%04x ret=0x%04x cookie_h=0x%08x cookie_l=0x%08x\n",
+                        i, d->flags, d->opcode, d->datalen, d->retval,
+                        d->cookie_high, d->cookie_low);
+               dev_info(&pf->pdev->dev,
+                        "            %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+                        d->params.raw[0], d->params.raw[1], d->params.raw[2],
+                        d->params.raw[3], d->params.raw[4], d->params.raw[5],
+                        d->params.raw[6], d->params.raw[7], d->params.raw[8],
+                        d->params.raw[9], d->params.raw[10], d->params.raw[11],
+                        d->params.raw[12], d->params.raw[13],
+                        d->params.raw[14], d->params.raw[15]);
+       }
+
+       dev_info(&pf->pdev->dev, "AdminQ Rx Ring\n");
+       ring = &(hw->aq.arq);
+       for (i = 0; i < ring->count; i++) {
+               struct i40e_aq_desc *d = I40E_ADMINQ_DESC(*ring, i);
+               dev_info(&pf->pdev->dev,
+                        "   ar[%02d] flags=0x%04x op=0x%04x dlen=0x%04x ret=0x%04x cookie_h=0x%08x cookie_l=0x%08x\n",
+                        i, d->flags, d->opcode, d->datalen, d->retval,
+                        d->cookie_high, d->cookie_low);
+               dev_info(&pf->pdev->dev,
+                        "            %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n",
+                        d->params.raw[0], d->params.raw[1], d->params.raw[2],
+                        d->params.raw[3], d->params.raw[4], d->params.raw[5],
+                        d->params.raw[6], d->params.raw[7], d->params.raw[8],
+                        d->params.raw[9], d->params.raw[10], d->params.raw[11],
+                        d->params.raw[12], d->params.raw[13],
+                        d->params.raw[14], d->params.raw[15]);
+       }
+}
+
+/**
+ * i40e_dbg_dump_desc - handles dump desc write into command datum
+ * @cnt: number of arguments that the user supplied
+ * @vsi_seid: vsi id entered by user
+ * @ring_id: ring id entered by user
+ * @desc_n: descriptor number entered by user
+ * @pf: the i40e_pf created in command write
+ * @is_rx_ring: true if rx, false if tx
+ **/
+static void i40e_dbg_dump_desc(int cnt, int vsi_seid, int ring_id, int desc_n,
+                              struct i40e_pf *pf, bool is_rx_ring)
+{
+       union i40e_rx_desc *ds;
+       struct i40e_ring ring;
+       struct i40e_vsi *vsi;
+       int i;
+
+       vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+       if (!vsi) {
+               dev_info(&pf->pdev->dev,
+                        "vsi %d not found\n", vsi_seid);
+               if (is_rx_ring)
+                       dev_info(&pf->pdev->dev, "dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+               else
+                       dev_info(&pf->pdev->dev, "dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+               return;
+       }
+       if (ring_id >= vsi->num_queue_pairs || ring_id < 0) {
+               dev_info(&pf->pdev->dev, "ring %d not found\n", ring_id);
+               if (is_rx_ring)
+                       dev_info(&pf->pdev->dev, "dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+               else
+                       dev_info(&pf->pdev->dev, "dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+               return;
+       }
+       if (is_rx_ring)
+               ring = vsi->rx_rings[ring_id];
+       else
+               ring = vsi->tx_rings[ring_id];
+       if (cnt == 2) {
+               dev_info(&pf->pdev->dev, "vsi = %02i %s ring = %02i\n",
+                        vsi_seid, is_rx_ring ? "rx" : "tx", ring_id);
+               for (i = 0; i < ring.count; i++) {
+                       if (is_rx_ring)
+                               ds = I40E_RX_DESC(&ring, i);
+                       else
+                               ds = (union i40e_rx_desc *)
+                                       I40E_TX_DESC(&ring, i);
+                       if ((sizeof(union i40e_rx_desc) ==
+                           sizeof(union i40e_16byte_rx_desc)) || (!is_rx_ring))
+                               dev_info(&pf->pdev->dev,
+                                        "   d[%03i] = 0x%016llx 0x%016llx\n", i,
+                                        ds->read.pkt_addr, ds->read.hdr_addr);
+                       else
+                               dev_info(&pf->pdev->dev,
+                                        "   d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+                                        i, ds->read.pkt_addr,
+                                        ds->read.hdr_addr,
+                                        ds->read.rsvd1, ds->read.rsvd2);
+               }
+       } else if (cnt == 3) {
+               if (desc_n >= ring.count || desc_n < 0) {
+                       dev_info(&pf->pdev->dev,
+                                "descriptor %d not found\n", desc_n);
+                       return;
+               }
+               if (is_rx_ring)
+                       ds = I40E_RX_DESC(&ring, desc_n);
+               else
+                       ds = (union i40e_rx_desc *)I40E_TX_DESC(&ring, desc_n);
+               if ((sizeof(union i40e_rx_desc) ==
+                   sizeof(union i40e_16byte_rx_desc)) || (!is_rx_ring))
+                       dev_info(&pf->pdev->dev,
+                                "vsi = %02i %s ring = %02i d[%03i] = 0x%016llx 0x%016llx\n",
+                                vsi_seid, is_rx_ring ? "rx" : "tx", ring_id,
+                                desc_n, ds->read.pkt_addr, ds->read.hdr_addr);
+               else
+                       dev_info(&pf->pdev->dev,
+                                "vsi = %02i rx ring = %02i d[%03i] = 0x%016llx 0x%016llx 0x%016llx 0x%016llx\n",
+                                vsi_seid, ring_id,
+                                desc_n, ds->read.pkt_addr, ds->read.hdr_addr,
+                                ds->read.rsvd1, ds->read.rsvd2);
+       } else {
+               if (is_rx_ring)
+                       dev_info(&pf->pdev->dev, "dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+               else
+                       dev_info(&pf->pdev->dev, "dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+       }
+}
+
+/**
+ * i40e_dbg_dump_vsi_no_seid - handles dump vsi write into command datum
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_vsi_no_seid(struct i40e_pf *pf)
+{
+       int i;
+
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i])
+                       dev_info(&pf->pdev->dev, "dump vsi[%d]: %d\n",
+                                i, pf->vsi[i]->seid);
+}
+
+/**
+ * i40e_dbg_dump_stats - handles dump stats write into command datum
+ * @pf: the i40e_pf created in command write
+ * @estats: the eth stats structure to be dumped
+ **/
+static void i40e_dbg_dump_eth_stats(struct i40e_pf *pf,
+                                   struct i40e_eth_stats *estats)
+{
+       dev_info(&pf->pdev->dev, "  ethstats:\n");
+       dev_info(&pf->pdev->dev,
+                "    rx_bytes = \t%lld \trx_unicast = \t\t%lld \trx_multicast = \t%lld\n",
+               estats->rx_bytes, estats->rx_unicast, estats->rx_multicast);
+       dev_info(&pf->pdev->dev,
+                "    rx_broadcast = \t%lld \trx_discards = \t\t%lld \trx_errors = \t%lld\n",
+                estats->rx_broadcast, estats->rx_discards, estats->rx_errors);
+       dev_info(&pf->pdev->dev,
+                "    rx_missed = \t%lld \trx_unknown_protocol = \t%lld \ttx_bytes = \t%lld\n",
+                estats->rx_missed, estats->rx_unknown_protocol,
+                estats->tx_bytes);
+       dev_info(&pf->pdev->dev,
+                "    tx_unicast = \t%lld \ttx_multicast = \t\t%lld \ttx_broadcast = \t%lld\n",
+                estats->tx_unicast, estats->tx_multicast, estats->tx_broadcast);
+       dev_info(&pf->pdev->dev,
+                "    tx_discards = \t%lld \ttx_errors = \t\t%lld\n",
+                estats->tx_discards, estats->tx_errors);
+}
+
+/**
+ * i40e_dbg_dump_stats - handles dump stats write into command datum
+ * @pf: the i40e_pf created in command write
+ * @stats: the stats structure to be dumped
+ **/
+static void i40e_dbg_dump_stats(struct i40e_pf *pf,
+                               struct i40e_hw_port_stats *stats)
+{
+       int i;
+
+       dev_info(&pf->pdev->dev, "  stats:\n");
+       dev_info(&pf->pdev->dev,
+                "    crc_errors = \t\t%lld \tillegal_bytes = \t%lld \terror_bytes = \t\t%lld\n",
+                stats->crc_errors, stats->illegal_bytes, stats->error_bytes);
+       dev_info(&pf->pdev->dev,
+                "    mac_local_faults = \t%lld \tmac_remote_faults = \t%lld \trx_length_errors = \t%lld\n",
+                stats->mac_local_faults, stats->mac_remote_faults,
+                stats->rx_length_errors);
+       dev_info(&pf->pdev->dev,
+                "    link_xon_rx = \t\t%lld \tlink_xoff_rx = \t\t%lld \tlink_xon_tx = \t\t%lld\n",
+                stats->link_xon_rx, stats->link_xoff_rx, stats->link_xon_tx);
+       dev_info(&pf->pdev->dev,
+                "    link_xoff_tx = \t\t%lld \trx_size_64 = \t\t%lld \trx_size_127 = \t\t%lld\n",
+                stats->link_xoff_tx, stats->rx_size_64, stats->rx_size_127);
+       dev_info(&pf->pdev->dev,
+                "    rx_size_255 = \t\t%lld \trx_size_511 = \t\t%lld \trx_size_1023 = \t\t%lld\n",
+                stats->rx_size_255, stats->rx_size_511, stats->rx_size_1023);
+       dev_info(&pf->pdev->dev,
+                "    rx_size_big = \t\t%lld \trx_undersize = \t\t%lld \trx_jabber = \t\t%lld\n",
+                stats->rx_size_big, stats->rx_undersize, stats->rx_jabber);
+       dev_info(&pf->pdev->dev,
+                "    rx_fragments = \t\t%lld \trx_oversize = \t\t%lld \ttx_size_64 = \t\t%lld\n",
+                stats->rx_fragments, stats->rx_oversize, stats->tx_size_64);
+       dev_info(&pf->pdev->dev,
+                "    tx_size_127 = \t\t%lld \ttx_size_255 = \t\t%lld \ttx_size_511 = \t\t%lld\n",
+                stats->tx_size_127, stats->tx_size_255, stats->tx_size_511);
+       dev_info(&pf->pdev->dev,
+                "    tx_size_1023 = \t\t%lld \ttx_size_big = \t\t%lld \tmac_short_packet_dropped = \t%lld\n",
+                stats->tx_size_1023, stats->tx_size_big,
+                stats->mac_short_packet_dropped);
+       for (i = 0; i < 8; i += 4) {
+               dev_info(&pf->pdev->dev,
+                        "    priority_xon_rx[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld\n",
+                        i, stats->priority_xon_rx[i],
+                        i+1, stats->priority_xon_rx[i+1],
+                        i+2, stats->priority_xon_rx[i+2],
+                        i+3, stats->priority_xon_rx[i+3]);
+       }
+       for (i = 0; i < 8; i += 4) {
+               dev_info(&pf->pdev->dev,
+                        "    priority_xoff_rx[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld\n",
+                        i, stats->priority_xoff_rx[i],
+                        i+1, stats->priority_xoff_rx[i+1],
+                        i+2, stats->priority_xoff_rx[i+2],
+                        i+3, stats->priority_xoff_rx[i+3]);
+       }
+       for (i = 0; i < 8; i += 4) {
+               dev_info(&pf->pdev->dev,
+                        "    priority_xon_tx[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld\n",
+                        i, stats->priority_xon_tx[i],
+                        i+1, stats->priority_xon_tx[i+1],
+                        i+2, stats->priority_xon_tx[i+2],
+                        i+3, stats->priority_xon_rx[i+3]);
+       }
+       for (i = 0; i < 8; i += 4) {
+               dev_info(&pf->pdev->dev,
+                        "    priority_xoff_tx[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld\n",
+                        i, stats->priority_xoff_tx[i],
+                        i+1, stats->priority_xoff_tx[i+1],
+                        i+2, stats->priority_xoff_tx[i+2],
+                        i+3, stats->priority_xoff_tx[i+3]);
+       }
+       for (i = 0; i < 8; i += 4) {
+               dev_info(&pf->pdev->dev,
+                        "    priority_xon_2_xoff[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld \t[%d] = \t%lld\n",
+                        i, stats->priority_xon_2_xoff[i],
+                        i+1, stats->priority_xon_2_xoff[i+1],
+                        i+2, stats->priority_xon_2_xoff[i+2],
+                        i+3, stats->priority_xon_2_xoff[i+3]);
+       }
+
+       i40e_dbg_dump_eth_stats(pf, &stats->eth);
+}
+
+/**
+ * i40e_dbg_dump_veb_seid - handles dump stats of a single given veb
+ * @pf: the i40e_pf created in command write
+ * @seid: the seid the user put in
+ **/
+static void i40e_dbg_dump_veb_seid(struct i40e_pf *pf, int seid)
+{
+       struct i40e_veb *veb;
+
+       if ((seid < I40E_BASE_VEB_SEID) ||
+           (seid >= (I40E_MAX_VEB + I40E_BASE_VEB_SEID))) {
+               dev_info(&pf->pdev->dev, "%d: bad seid\n", seid);
+               return;
+       }
+
+       veb = i40e_dbg_find_veb(pf, seid);
+       if (!veb) {
+               dev_info(&pf->pdev->dev,
+                        "%d: can't find veb\n", seid);
+               return;
+       }
+       dev_info(&pf->pdev->dev,
+                "veb idx=%d,%d stats_ic=%d  seid=%d uplink=%d\n",
+                veb->idx, veb->veb_idx, veb->stats_idx, veb->seid,
+                veb->uplink_seid);
+       i40e_dbg_dump_eth_stats(pf, &veb->stats);
+}
+
+/**
+ * i40e_dbg_dump_veb_all - dumps all known veb's stats
+ * @pf: the i40e_pf created in command write
+ **/
+static void i40e_dbg_dump_veb_all(struct i40e_pf *pf)
+{
+       struct i40e_veb *veb;
+       int i;
+
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               veb = pf->veb[i];
+               if (veb)
+                       i40e_dbg_dump_veb_seid(pf, veb->seid);
+       }
+}
+
+#define I40E_MAX_DEBUG_OUT_BUFFER (4096*4)
+/**
+ * i40e_dbg_command_write - write into command datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_command_write(struct file *filp,
+                                     const char __user *buffer,
+                                     size_t count, loff_t *ppos)
+{
+       struct i40e_pf *pf = filp->private_data;
+       int bytes_not_copied;
+       struct i40e_vsi *vsi;
+       u8 *print_buf_start;
+       u8 *print_buf;
+       char *cmd_buf;
+       int vsi_seid;
+       int veb_seid;
+       int cnt;
+
+       /* don't allow partial writes */
+       if (*ppos != 0)
+               return 0;
+
+       cmd_buf = kzalloc(count + 1, GFP_KERNEL);
+       if (!cmd_buf)
+               return count;
+       bytes_not_copied = copy_from_user(cmd_buf, buffer, count);
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+       if (bytes_not_copied > 0)
+               count -= bytes_not_copied;
+       cmd_buf[count] = '\0';
+
+       print_buf_start = kzalloc(I40E_MAX_DEBUG_OUT_BUFFER, GFP_KERNEL);
+       if (!print_buf_start)
+               goto command_write_done;
+       print_buf = print_buf_start;
+
+       if (strncmp(cmd_buf, "add vsi", 7) == 0) {
+               vsi_seid = -1;
+               cnt = sscanf(&cmd_buf[7], "%i", &vsi_seid);
+               if (cnt == 0) {
+                       /* default to PF VSI */
+                       vsi_seid = pf->vsi[pf->lan_vsi]->seid;
+               } else if (vsi_seid < 0) {
+                       dev_info(&pf->pdev->dev, "add VSI %d: bad vsi seid\n",
+                                vsi_seid);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_vsi_setup(pf, I40E_VSI_VMDQ2, vsi_seid, 0);
+               if (vsi)
+                       dev_info(&pf->pdev->dev, "added VSI %d to relay %d\n",
+                                vsi->seid, vsi->uplink_seid);
+               else
+                       dev_info(&pf->pdev->dev, "'%s' failed\n", cmd_buf);
+
+       } else if (strncmp(cmd_buf, "del vsi", 7) == 0) {
+               sscanf(&cmd_buf[7], "%i", &vsi_seid);
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "del VSI %d: seid not found\n",
+                                vsi_seid);
+                       goto command_write_done;
+               }
+
+               dev_info(&pf->pdev->dev, "deleting VSI %d\n", vsi_seid);
+               i40e_vsi_release(vsi);
+
+       } else if (strncmp(cmd_buf, "add relay", 9) == 0) {
+               struct i40e_veb *veb;
+               int uplink_seid, i;
+
+               cnt = sscanf(&cmd_buf[9], "%i %i", &uplink_seid, &vsi_seid);
+               if (cnt != 2) {
+                       dev_info(&pf->pdev->dev,
+                                "add relay: bad command string, cnt=%d\n",
+                                cnt);
+                       goto command_write_done;
+               } else if (uplink_seid < 0) {
+                       dev_info(&pf->pdev->dev,
+                                "add relay %d: bad uplink seid\n",
+                                uplink_seid);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "add relay: vsi VSI %d not found\n", vsi_seid);
+                       goto command_write_done;
+               }
+
+               for (i = 0; i < I40E_MAX_VEB; i++)
+                       if (pf->veb[i] && pf->veb[i]->seid == uplink_seid)
+                               break;
+               if (i >= I40E_MAX_VEB && uplink_seid != 0 &&
+                   uplink_seid != pf->mac_seid) {
+                       dev_info(&pf->pdev->dev,
+                                "add relay: relay uplink %d not found\n",
+                                uplink_seid);
+                       goto command_write_done;
+               }
+
+               veb = i40e_veb_setup(pf, 0, uplink_seid, vsi_seid,
+                                    vsi->tc_config.enabled_tc);
+               if (veb)
+                       dev_info(&pf->pdev->dev, "added relay %d\n", veb->seid);
+               else
+                       dev_info(&pf->pdev->dev, "add relay failed\n");
+
+       } else if (strncmp(cmd_buf, "del relay", 9) == 0) {
+               int i;
+               cnt = sscanf(&cmd_buf[9], "%i", &veb_seid);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev,
+                                "del relay: bad command string, cnt=%d\n",
+                                cnt);
+                       goto command_write_done;
+               } else if (veb_seid < 0) {
+                       dev_info(&pf->pdev->dev,
+                                "del relay %d: bad relay seid\n", veb_seid);
+                       goto command_write_done;
+               }
+
+               /* find the veb */
+               for (i = 0; i < I40E_MAX_VEB; i++)
+                       if (pf->veb[i] && pf->veb[i]->seid == veb_seid)
+                               break;
+               if (i >= I40E_MAX_VEB) {
+                       dev_info(&pf->pdev->dev,
+                                "del relay: relay %d not found\n", veb_seid);
+                       goto command_write_done;
+               }
+
+               dev_info(&pf->pdev->dev, "deleting relay %d\n", veb_seid);
+               i40e_veb_release(pf->veb[i]);
+
+       } else if (strncmp(cmd_buf, "add macaddr", 11) == 0) {
+               u8 ma[6];
+               int vlan = 0;
+               struct i40e_mac_filter *f;
+               int ret;
+
+               cnt = sscanf(&cmd_buf[11],
+                            "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
+                            &vsi_seid,
+                            &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
+                            &vlan);
+               if (cnt == 7) {
+                       vlan = 0;
+               } else if (cnt != 8) {
+                       dev_info(&pf->pdev->dev,
+                                "add macaddr: bad command string, cnt=%d\n",
+                                cnt);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "add macaddr: VSI %d not found\n", vsi_seid);
+                       goto command_write_done;
+               }
+
+               f = i40e_add_filter(vsi, ma, vlan, false, false);
+               ret = i40e_sync_vsi_filters(vsi);
+               if (f && !ret)
+                       dev_info(&pf->pdev->dev,
+                                "add macaddr: %pM vlan=%d added to VSI %d\n",
+                                ma, vlan, vsi_seid);
+               else
+                       dev_info(&pf->pdev->dev,
+                                "add macaddr: %pM vlan=%d to VSI %d failed, f=%p ret=%d\n",
+                                ma, vlan, vsi_seid, f, ret);
+
+       } else if (strncmp(cmd_buf, "del macaddr", 11) == 0) {
+               u8 ma[6];
+               int vlan = 0;
+               int ret;
+
+               cnt = sscanf(&cmd_buf[11],
+                            "%i %hhx:%hhx:%hhx:%hhx:%hhx:%hhx %i",
+                            &vsi_seid,
+                            &ma[0], &ma[1], &ma[2], &ma[3], &ma[4], &ma[5],
+                            &vlan);
+               if (cnt == 7) {
+                       vlan = 0;
+               } else if (cnt != 8) {
+                       dev_info(&pf->pdev->dev,
+                                "del macaddr: bad command string, cnt=%d\n",
+                                cnt);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "del macaddr: VSI %d not found\n", vsi_seid);
+                       goto command_write_done;
+               }
+
+               i40e_del_filter(vsi, ma, vlan, false, false);
+               ret = i40e_sync_vsi_filters(vsi);
+               if (!ret)
+                       dev_info(&pf->pdev->dev,
+                                "del macaddr: %pM vlan=%d removed from VSI %d\n",
+                                ma, vlan, vsi_seid);
+               else
+                       dev_info(&pf->pdev->dev,
+                                "del macaddr: %pM vlan=%d from VSI %d failed, ret=%d\n",
+                                ma, vlan, vsi_seid, ret);
+
+       } else if (strncmp(cmd_buf, "add pvid", 8) == 0) {
+               int v;
+               u16 vid;
+               i40e_status ret;
+
+               cnt = sscanf(&cmd_buf[8], "%i %u", &vsi_seid, &v);
+               if (cnt != 2) {
+                       dev_info(&pf->pdev->dev,
+                                "add pvid: bad command string, cnt=%d\n", cnt);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "add pvid: VSI %d not found\n",
+                                vsi_seid);
+                       goto command_write_done;
+               }
+
+               vid = (unsigned)v;
+               ret = i40e_vsi_add_pvid(vsi, vid);
+               if (!ret)
+                       dev_info(&pf->pdev->dev,
+                                "add pvid: %d added to VSI %d\n",
+                                vid, vsi_seid);
+               else
+                       dev_info(&pf->pdev->dev,
+                                "add pvid: %d to VSI %d failed, ret=%d\n",
+                                vid, vsi_seid, ret);
+
+       } else if (strncmp(cmd_buf, "del pvid", 8) == 0) {
+
+               cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev,
+                                "del pvid: bad command string, cnt=%d\n",
+                                cnt);
+                       goto command_write_done;
+               }
+
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "del pvid: VSI %d not found\n", vsi_seid);
+                       goto command_write_done;
+               }
+
+               i40e_vsi_remove_pvid(vsi);
+               dev_info(&pf->pdev->dev,
+                        "del pvid: removed from VSI %d\n", vsi_seid);
+
+       } else if (strncmp(cmd_buf, "dump", 4) == 0) {
+               if (strncmp(&cmd_buf[5], "switch", 6) == 0) {
+                       i40e_fetch_switch_configuration(pf, true);
+               } else if (strncmp(&cmd_buf[5], "vsi", 3) == 0) {
+                       cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+                       if (cnt > 0)
+                               i40e_dbg_dump_vsi_seid(pf, vsi_seid);
+                       else
+                               i40e_dbg_dump_vsi_no_seid(pf);
+               } else if (strncmp(&cmd_buf[5], "veb", 3) == 0) {
+                       cnt = sscanf(&cmd_buf[8], "%i", &vsi_seid);
+                       if (cnt > 0)
+                               i40e_dbg_dump_veb_seid(pf, vsi_seid);
+                       else
+                               i40e_dbg_dump_veb_all(pf);
+               } else if (strncmp(&cmd_buf[5], "desc", 4) == 0) {
+                       int ring_id, desc_n;
+                       if (strncmp(&cmd_buf[10], "rx", 2) == 0) {
+                               cnt = sscanf(&cmd_buf[12], "%i %i %i",
+                                            &vsi_seid, &ring_id, &desc_n);
+                               i40e_dbg_dump_desc(cnt, vsi_seid, ring_id,
+                                                  desc_n, pf, true);
+                       } else if (strncmp(&cmd_buf[10], "tx", 2)
+                                       == 0) {
+                               cnt = sscanf(&cmd_buf[12], "%i %i %i",
+                                            &vsi_seid, &ring_id, &desc_n);
+                               i40e_dbg_dump_desc(cnt, vsi_seid, ring_id,
+                                                  desc_n, pf, false);
+                       } else if (strncmp(&cmd_buf[10], "aq", 2) == 0) {
+                               i40e_dbg_dump_aq_desc(pf);
+                       } else {
+                               dev_info(&pf->pdev->dev,
+                                        "dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+                               dev_info(&pf->pdev->dev,
+                                        "dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+                               dev_info(&pf->pdev->dev, "dump desc aq\n");
+                       }
+               } else if (strncmp(&cmd_buf[5], "stats", 5) == 0) {
+                       dev_info(&pf->pdev->dev, "pf stats:\n");
+                       i40e_dbg_dump_stats(pf, &pf->stats);
+                       dev_info(&pf->pdev->dev, "pf stats_offsets:\n");
+                       i40e_dbg_dump_stats(pf, &pf->stats_offsets);
+               } else if (strncmp(&cmd_buf[5], "reset stats", 11) == 0) {
+                       dev_info(&pf->pdev->dev,
+                                "core reset count: %d\n", pf->corer_count);
+                       dev_info(&pf->pdev->dev,
+                                "global reset count: %d\n", pf->globr_count);
+                       dev_info(&pf->pdev->dev,
+                                "emp reset count: %d\n", pf->empr_count);
+                       dev_info(&pf->pdev->dev,
+                                "pf reset count: %d\n", pf->pfr_count);
+               } else if (strncmp(&cmd_buf[5], "port", 4) == 0) {
+                       struct i40e_aqc_query_port_ets_config_resp *bw_data;
+                       struct i40e_dcbx_config *cfg =
+                                               &pf->hw.local_dcbx_config;
+                       struct i40e_dcbx_config *r_cfg =
+                                               &pf->hw.remote_dcbx_config;
+                       int i, ret;
+
+                       bw_data = kzalloc(sizeof(
+                                   struct i40e_aqc_query_port_ets_config_resp),
+                                         GFP_KERNEL);
+                       if (!bw_data) {
+                               ret = -ENOMEM;
+                               goto command_write_done;
+                       }
+
+                       ret = i40e_aq_query_port_ets_config(&pf->hw,
+                                                           pf->mac_seid,
+                                                           bw_data, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Query Port ETS Config AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               kfree(bw_data);
+                               bw_data = NULL;
+                               goto command_write_done;
+                       }
+                       dev_info(&pf->pdev->dev,
+                                "port bw: tc_valid=0x%x tc_strict_prio=0x%x, tc_bw_max=0x%04x,0x%04x\n",
+                                bw_data->tc_valid_bits,
+                                bw_data->tc_strict_priority_bits,
+                                le16_to_cpu(bw_data->tc_bw_max[0]),
+                                le16_to_cpu(bw_data->tc_bw_max[1]));
+                       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                               dev_info(&pf->pdev->dev, "port bw: tc_bw_share=%d tc_bw_limit=%d\n",
+                                        bw_data->tc_bw_share_credits[i],
+                                        le16_to_cpu(bw_data->tc_bw_limits[i]));
+                       }
+
+                       kfree(bw_data);
+                       bw_data = NULL;
+
+                       dev_info(&pf->pdev->dev,
+                                "port ets_cfg: willing=%d cbs=%d, maxtcs=%d\n",
+                                cfg->etscfg.willing, cfg->etscfg.cbs,
+                                cfg->etscfg.maxtcs);
+                       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                               dev_info(&pf->pdev->dev, "port ets_cfg: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+                                        i, cfg->etscfg.prioritytable[i],
+                                        cfg->etscfg.tcbwtable[i],
+                                        cfg->etscfg.tsatable[i]);
+                       }
+                       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                               dev_info(&pf->pdev->dev, "port ets_rec: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+                                        i, cfg->etsrec.prioritytable[i],
+                                        cfg->etsrec.tcbwtable[i],
+                                        cfg->etsrec.tsatable[i]);
+                       }
+                       dev_info(&pf->pdev->dev,
+                                "port pfc_cfg: willing=%d mbc=%d, pfccap=%d pfcenable=0x%x\n",
+                                cfg->pfc.willing, cfg->pfc.mbc,
+                                cfg->pfc.pfccap, cfg->pfc.pfcenable);
+                       dev_info(&pf->pdev->dev,
+                                "port app_table: num_apps=%d\n", cfg->numapps);
+                       for (i = 0; i < cfg->numapps; i++) {
+                               dev_info(&pf->pdev->dev, "port app_table: %d prio=%d selector=%d protocol=0x%x\n",
+                                        i, cfg->app[i].priority,
+                                        cfg->app[i].selector,
+                                        cfg->app[i].protocolid);
+                       }
+                       /* Peer TLV DCBX data */
+                       dev_info(&pf->pdev->dev,
+                                "remote port ets_cfg: willing=%d cbs=%d, maxtcs=%d\n",
+                                r_cfg->etscfg.willing,
+                                r_cfg->etscfg.cbs, r_cfg->etscfg.maxtcs);
+                       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                               dev_info(&pf->pdev->dev, "remote port ets_cfg: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+                                        i, r_cfg->etscfg.prioritytable[i],
+                                        r_cfg->etscfg.tcbwtable[i],
+                                        r_cfg->etscfg.tsatable[i]);
+                       }
+                       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                               dev_info(&pf->pdev->dev, "remote port ets_rec: %d prio_tc=%d tcbw=%d tctsa=%d\n",
+                                        i, r_cfg->etsrec.prioritytable[i],
+                                        r_cfg->etsrec.tcbwtable[i],
+                                        r_cfg->etsrec.tsatable[i]);
+                       }
+                       dev_info(&pf->pdev->dev,
+                                "remote port pfc_cfg: willing=%d mbc=%d, pfccap=%d pfcenable=0x%x\n",
+                                r_cfg->pfc.willing,
+                                r_cfg->pfc.mbc,
+                                r_cfg->pfc.pfccap,
+                                r_cfg->pfc.pfcenable);
+                       dev_info(&pf->pdev->dev,
+                                "remote port app_table: num_apps=%d\n",
+                                r_cfg->numapps);
+                       for (i = 0; i < r_cfg->numapps; i++) {
+                               dev_info(&pf->pdev->dev, "remote port app_table: %d prio=%d selector=%d protocol=0x%x\n",
+                                        i, r_cfg->app[i].priority,
+                                        r_cfg->app[i].selector,
+                                        r_cfg->app[i].protocolid);
+                       }
+               } else {
+                       dev_info(&pf->pdev->dev,
+                                "dump desc tx <vsi_seid> <ring_id> [<desc_n>], dump desc rx <vsi_seid> <ring_id> [<desc_n>],\n");
+                       dev_info(&pf->pdev->dev, "dump switch, dump vsi [seid] or\n");
+                       dev_info(&pf->pdev->dev, "dump stats\n");
+                       dev_info(&pf->pdev->dev, "dump reset stats\n");
+                       dev_info(&pf->pdev->dev, "dump port\n");
+                       dev_info(&pf->pdev->dev,
+                                "dump debug fwdata <cluster_id> <table_id> <index>\n");
+               }
+
+       } else if (strncmp(cmd_buf, "msg_enable", 10) == 0) {
+               u32 level;
+               cnt = sscanf(&cmd_buf[10], "%i", &level);
+               if (cnt) {
+                       if (I40E_DEBUG_USER & level) {
+                               pf->hw.debug_mask = level;
+                               dev_info(&pf->pdev->dev,
+                                        "set hw.debug_mask = 0x%08x\n",
+                                        pf->hw.debug_mask);
+                       }
+                       pf->msg_enable = level;
+                       dev_info(&pf->pdev->dev, "set msg_enable = 0x%08x\n",
+                                pf->msg_enable);
+               } else {
+                       dev_info(&pf->pdev->dev, "msg_enable = 0x%08x\n",
+                                pf->msg_enable);
+               }
+       } else if (strncmp(cmd_buf, "pfr", 3) == 0) {
+               dev_info(&pf->pdev->dev, "forcing PFR\n");
+               i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+
+       } else if (strncmp(cmd_buf, "corer", 5) == 0) {
+               dev_info(&pf->pdev->dev, "forcing CoreR\n");
+               i40e_do_reset(pf, (1 << __I40E_CORE_RESET_REQUESTED));
+
+       } else if (strncmp(cmd_buf, "globr", 5) == 0) {
+               dev_info(&pf->pdev->dev, "forcing GlobR\n");
+               i40e_do_reset(pf, (1 << __I40E_GLOBAL_RESET_REQUESTED));
+
+       } else if (strncmp(cmd_buf, "read", 4) == 0) {
+               u32 address;
+               u32 value;
+               cnt = sscanf(&cmd_buf[4], "%x", &address);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev, "read <reg>\n");
+                       goto command_write_done;
+               }
+
+               /* check the range on address */
+               if (address >= I40E_MAX_REGISTER) {
+                       dev_info(&pf->pdev->dev, "read reg address 0x%08x too large\n",
+                                address);
+                       goto command_write_done;
+               }
+
+               value = rd32(&pf->hw, address);
+               dev_info(&pf->pdev->dev, "read: 0x%08x = 0x%08x\n",
+                        address, value);
+
+       } else if (strncmp(cmd_buf, "write", 5) == 0) {
+               u32 address, value;
+               cnt = sscanf(&cmd_buf[5], "%x %x", &address, &value);
+               if (cnt != 2) {
+                       dev_info(&pf->pdev->dev, "write <reg> <value>\n");
+                       goto command_write_done;
+               }
+
+               /* check the range on address */
+               if (address >= I40E_MAX_REGISTER) {
+                       dev_info(&pf->pdev->dev, "write reg address 0x%08x too large\n",
+                                address);
+                       goto command_write_done;
+               }
+               wr32(&pf->hw, address, value);
+               value = rd32(&pf->hw, address);
+               dev_info(&pf->pdev->dev, "write: 0x%08x = 0x%08x\n",
+                        address, value);
+       } else if (strncmp(cmd_buf, "clear_stats", 11) == 0) {
+               if (strncmp(&cmd_buf[12], "vsi", 3) == 0) {
+                       cnt = sscanf(&cmd_buf[15], "%d", &vsi_seid);
+                       if (cnt == 0) {
+                               int i;
+                               for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+                                       i40e_vsi_reset_stats(pf->vsi[i]);
+                               dev_info(&pf->pdev->dev, "vsi clear stats called for all vsi's\n");
+                       } else if (cnt == 1) {
+                               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+                               if (!vsi) {
+                                       dev_info(&pf->pdev->dev,
+                                                "clear_stats vsi: bad vsi %d\n",
+                                                vsi_seid);
+                                       goto command_write_done;
+                               }
+                               i40e_vsi_reset_stats(vsi);
+                               dev_info(&pf->pdev->dev,
+                                        "vsi clear stats called for vsi %d\n",
+                                        vsi_seid);
+                       } else {
+                               dev_info(&pf->pdev->dev, "clear_stats vsi [seid]\n");
+                       }
+               } else if (strncmp(&cmd_buf[12], "pf", 2) == 0) {
+                       i40e_pf_reset_stats(pf);
+                       dev_info(&pf->pdev->dev, "pf clear stats called\n");
+               } else {
+                       dev_info(&pf->pdev->dev, "clear_stats vsi [seid] or clear_stats pf\n");
+               }
+       } else if ((strncmp(cmd_buf, "add fd_filter", 13) == 0) ||
+                  (strncmp(cmd_buf, "rem fd_filter", 13) == 0)) {
+               struct i40e_fdir_data fd_data;
+               int ret;
+               u16 packet_len, i, j = 0;
+               char *asc_packet;
+               bool add = false;
+
+               asc_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
+                                    GFP_KERNEL);
+               if (!asc_packet)
+                       goto command_write_done;
+
+               fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
+                                            GFP_KERNEL);
+
+               if (!fd_data.raw_packet) {
+                       kfree(asc_packet);
+                       asc_packet = NULL;
+                       goto command_write_done;
+               }
+
+               if (strncmp(cmd_buf, "add", 3) == 0)
+                       add = true;
+               cnt = sscanf(&cmd_buf[13],
+                            "%hx %2hhx %2hhx %hx %2hhx %2hhx %hx %x %hd %512s",
+                            &fd_data.q_index,
+                            &fd_data.flex_off, &fd_data.pctype,
+                            &fd_data.dest_vsi, &fd_data.dest_ctl,
+                            &fd_data.fd_status, &fd_data.cnt_index,
+                            &fd_data.fd_id, &packet_len, asc_packet);
+               if (cnt != 10) {
+                       dev_info(&pf->pdev->dev,
+                                "program fd_filter: bad command string, cnt=%d\n",
+                                cnt);
+                       kfree(asc_packet);
+                       asc_packet = NULL;
+                       kfree(fd_data.raw_packet);
+                       goto command_write_done;
+               }
+
+               /* fix packet length if user entered 0 */
+               if (packet_len == 0)
+                       packet_len = I40E_FDIR_MAX_RAW_PACKET_LOOKUP;
+
+               /* make sure to check the max as well */
+               packet_len = min_t(u16,
+                                  packet_len, I40E_FDIR_MAX_RAW_PACKET_LOOKUP);
+
+               dev_info(&pf->pdev->dev, "FD raw packet:\n");
+               for (i = 0; i < packet_len; i++) {
+                       sscanf(&asc_packet[j], "%2hhx ",
+                              &fd_data.raw_packet[i]);
+                       j += 3;
+                       snprintf(print_buf, 3, "%02x ", fd_data.raw_packet[i]);
+                       print_buf += 3;
+                       if ((i % 16) == 15) {
+                               snprintf(print_buf, 1, "\n");
+                               print_buf++;
+                       }
+               }
+               dev_info(&pf->pdev->dev, "%s\n", print_buf_start);
+               ret = i40e_program_fdir_filter(&fd_data, pf, add);
+               if (!ret) {
+                       dev_info(&pf->pdev->dev, "Filter command send Status : Success\n");
+               } else {
+                       dev_info(&pf->pdev->dev,
+                                "Filter command send failed %d\n", ret);
+               }
+               kfree(fd_data.raw_packet);
+               fd_data.raw_packet = NULL;
+               kfree(asc_packet);
+               asc_packet = NULL;
+       } else if (strncmp(cmd_buf, "lldp", 4) == 0) {
+               if (strncmp(&cmd_buf[5], "stop", 4) == 0) {
+                       int ret;
+                       ret = i40e_aq_stop_lldp(&pf->hw, false, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Stop LLDP AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               goto command_write_done;
+                       }
+               } else if (strncmp(&cmd_buf[5], "start", 5) == 0) {
+                       int ret;
+                       ret = i40e_aq_start_lldp(&pf->hw, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Start LLDP AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               goto command_write_done;
+                       }
+               } else if (strncmp(&cmd_buf[5],
+                          "get local", 9) == 0) {
+                       int ret, i;
+                       u8 *buff;
+                       u16 llen, rlen;
+                       buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
+                       if (!buff)
+                               goto command_write_done;
+
+                       ret = i40e_aq_get_lldp_mib(&pf->hw, 0,
+                                                  I40E_AQ_LLDP_MIB_LOCAL,
+                                                  buff, I40E_LLDPDU_SIZE,
+                                                  &llen, &rlen, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Get LLDP MIB (local) AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               kfree(buff);
+                               buff = NULL;
+                               goto command_write_done;
+                       }
+                       dev_info(&pf->pdev->dev,
+                                "Get LLDP MIB (local) AQ buffer written back:\n");
+                       for (i = 0; i < I40E_LLDPDU_SIZE; i++) {
+                               snprintf(print_buf, 3, "%02x ", buff[i]);
+                               print_buf += 3;
+                               if ((i % 16) == 15) {
+                                       snprintf(print_buf, 1, "\n");
+                                       print_buf++;
+                               }
+                       }
+                       dev_info(&pf->pdev->dev, "%s\n", print_buf_start);
+                       kfree(buff);
+                       buff = NULL;
+               } else if (strncmp(&cmd_buf[5], "get remote", 10) == 0) {
+                       int ret, i;
+                       u8 *buff;
+                       u16 llen, rlen;
+                       buff = kzalloc(I40E_LLDPDU_SIZE, GFP_KERNEL);
+                       if (!buff)
+                               goto command_write_done;
+
+                       ret = i40e_aq_get_lldp_mib(&pf->hw,
+                                       I40E_AQ_LLDP_BRIDGE_TYPE_NEAREST_BRIDGE,
+                                       I40E_AQ_LLDP_MIB_LOCAL,
+                                       buff, I40E_LLDPDU_SIZE,
+                                       &llen, &rlen, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Get LLDP MIB (remote) AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               kfree(buff);
+                               buff = NULL;
+                               goto command_write_done;
+                       }
+                       dev_info(&pf->pdev->dev,
+                                "Get LLDP MIB (remote) AQ buffer written back:\n");
+                       for (i = 0; i < I40E_LLDPDU_SIZE; i++) {
+                               snprintf(print_buf, 3, "%02x ", buff[i]);
+                               print_buf += 3;
+                               if ((i % 16) == 15) {
+                                       snprintf(print_buf, 1, "\n");
+                                       print_buf++;
+                               }
+                       }
+                       dev_info(&pf->pdev->dev, "%s\n", print_buf_start);
+                       kfree(buff);
+                       buff = NULL;
+               } else if (strncmp(&cmd_buf[5], "event on", 8) == 0) {
+                       int ret;
+                       ret = i40e_aq_cfg_lldp_mib_change_event(&pf->hw,
+                                                               true, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Config LLDP MIB Change Event (on) AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               goto command_write_done;
+                       }
+               } else if (strncmp(&cmd_buf[5], "event off", 9) == 0) {
+                       int ret;
+                       ret = i40e_aq_cfg_lldp_mib_change_event(&pf->hw,
+                                                               false, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "Config LLDP MIB Change Event (off) AQ command failed =0x%x\n",
+                                        pf->hw.aq.asq_last_status);
+                               goto command_write_done;
+                       }
+               }
+       } else if (strncmp(cmd_buf, "nvm read", 8) == 0) {
+               u16 buffer_len, i, bytes;
+               u16 module;
+               u32 offset;
+               u16 *buff;
+               int ret;
+
+               cnt = sscanf(&cmd_buf[8], "%hx %x %hx",
+                            &module, &offset, &buffer_len);
+               if (cnt == 0) {
+                       module = 0;
+                       offset = 0;
+                       buffer_len = 0;
+               } else if (cnt == 1) {
+                       offset = 0;
+                       buffer_len = 0;
+               } else if (cnt == 2) {
+                       buffer_len = 0;
+               } else if (cnt > 3) {
+                       dev_info(&pf->pdev->dev,
+                                "nvm read: bad command string, cnt=%d\n", cnt);
+                       goto command_write_done;
+               }
+
+               /* Read at least 512 words */
+               if (buffer_len == 0)
+                       buffer_len = 512;
+
+               bytes = 2 * buffer_len;
+               buff = kzalloc(bytes, GFP_KERNEL);
+               if (!buff)
+                       goto command_write_done;
+
+               ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Failed Acquiring NVM resource for read err=%d status=0x%x\n",
+                                ret, pf->hw.aq.asq_last_status);
+                       kfree(buff);
+                       goto command_write_done;
+               }
+
+               ret = i40e_aq_read_nvm(&pf->hw, module, (2 * offset),
+                                      bytes, (u8 *)buff, true, NULL);
+               i40e_release_nvm(&pf->hw);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Read NVM AQ failed err=%d status=0x%x\n",
+                                ret, pf->hw.aq.asq_last_status);
+               } else {
+                       dev_info(&pf->pdev->dev,
+                                "Read NVM module=0x%x offset=0x%x words=%d\n",
+                                module, offset, buffer_len);
+                       for (i = 0; i < buffer_len; i++) {
+                               if ((i % 16) == 0) {
+                                       snprintf(print_buf, 11, "\n0x%08x: ",
+                                                offset + i);
+                                       print_buf += 11;
+                               }
+                               snprintf(print_buf, 5, "%04x ", buff[i]);
+                               print_buf += 5;
+                       }
+                       dev_info(&pf->pdev->dev, "%s\n", print_buf_start);
+               }
+               kfree(buff);
+               buff = NULL;
+       } else {
+               dev_info(&pf->pdev->dev, "unknown command '%s'\n", cmd_buf);
+               dev_info(&pf->pdev->dev, "available commands\n");
+               dev_info(&pf->pdev->dev, "  add vsi [relay_seid]\n");
+               dev_info(&pf->pdev->dev, "  del vsi [vsi_seid]\n");
+               dev_info(&pf->pdev->dev, "  add relay <uplink_seid> <vsi_seid>\n");
+               dev_info(&pf->pdev->dev, "  del relay <relay_seid>\n");
+               dev_info(&pf->pdev->dev, "  add macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
+               dev_info(&pf->pdev->dev, "  del macaddr <vsi_seid> <aa:bb:cc:dd:ee:ff> [vlan]\n");
+               dev_info(&pf->pdev->dev, "  add pvid <vsi_seid> <vid>\n");
+               dev_info(&pf->pdev->dev, "  del pvid <vsi_seid>\n");
+               dev_info(&pf->pdev->dev, "  dump switch\n");
+               dev_info(&pf->pdev->dev, "  dump vsi [seid]\n");
+               dev_info(&pf->pdev->dev, "  dump desc tx <vsi_seid> <ring_id> [<desc_n>]\n");
+               dev_info(&pf->pdev->dev, "  dump desc rx <vsi_seid> <ring_id> [<desc_n>]\n");
+               dev_info(&pf->pdev->dev, "  dump desc aq\n");
+               dev_info(&pf->pdev->dev, "  dump stats\n");
+               dev_info(&pf->pdev->dev, "  dump reset stats\n");
+               dev_info(&pf->pdev->dev, "  msg_enable [level]\n");
+               dev_info(&pf->pdev->dev, "  read <reg>\n");
+               dev_info(&pf->pdev->dev, "  write <reg> <value>\n");
+               dev_info(&pf->pdev->dev, "  clear_stats vsi [seid]\n");
+               dev_info(&pf->pdev->dev, "  clear_stats pf\n");
+               dev_info(&pf->pdev->dev, "  pfr\n");
+               dev_info(&pf->pdev->dev, "  corer\n");
+               dev_info(&pf->pdev->dev, "  globr\n");
+               dev_info(&pf->pdev->dev, "  add fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
+               dev_info(&pf->pdev->dev, "  rem fd_filter <dest q_index> <flex_off> <pctype> <dest_vsi> <dest_ctl> <fd_status> <cnt_index> <fd_id> <packet_len> <packet>\n");
+               dev_info(&pf->pdev->dev, "  lldp start\n");
+               dev_info(&pf->pdev->dev, "  lldp stop\n");
+               dev_info(&pf->pdev->dev, "  lldp get local\n");
+               dev_info(&pf->pdev->dev, "  lldp get remote\n");
+               dev_info(&pf->pdev->dev, "  lldp event on\n");
+               dev_info(&pf->pdev->dev, "  lldp event off\n");
+               dev_info(&pf->pdev->dev, "  nvm read [module] [word_offset] [word_count]\n");
+       }
+
+command_write_done:
+       kfree(cmd_buf);
+       cmd_buf = NULL;
+       kfree(print_buf_start);
+       print_buf = NULL;
+       print_buf_start = NULL;
+       return count;
+}
+
+static const struct file_operations i40e_dbg_command_fops = {
+       .owner = THIS_MODULE,
+       .open =  simple_open,
+       .read =  i40e_dbg_command_read,
+       .write = i40e_dbg_command_write,
+};
+
+/**************************************************************
+ * netdev_ops
+ * The netdev_ops entry in debugfs is for giving the driver commands
+ * to be executed from the netdev operations.
+ **************************************************************/
+static char i40e_dbg_netdev_ops_buf[256] = "hello world";
+
+/**
+ * i40e_dbg_netdev_ops - read for netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to write the data for the user to read
+ * @count: the size of the user's buffer
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_netdev_ops_read(struct file *filp, char __user *buffer,
+                                       size_t count, loff_t *ppos)
+{
+       struct i40e_pf *pf = filp->private_data;
+       int bytes_not_copied;
+       int buf_size = 256;
+       char *buf;
+       int len;
+
+       /* don't allow partal reads */
+       if (*ppos != 0)
+               return 0;
+       if (count < buf_size)
+               return -ENOSPC;
+
+       buf = kzalloc(buf_size, GFP_KERNEL);
+       if (!buf)
+               return -ENOSPC;
+
+       len = snprintf(buf, buf_size, "%s: %s\n",
+                      pf->vsi[pf->lan_vsi]->netdev->name,
+                      i40e_dbg_netdev_ops_buf);
+
+       bytes_not_copied = copy_to_user(buffer, buf, len);
+       kfree(buf);
+
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+
+       *ppos = len;
+       return len;
+}
+
+/**
+ * i40e_dbg_netdev_ops_write - write into netdev_ops datum
+ * @filp: the opened file
+ * @buffer: where to find the user's data
+ * @count: the length of the user's data
+ * @ppos: file position offset
+ **/
+static ssize_t i40e_dbg_netdev_ops_write(struct file *filp,
+                                        const char __user *buffer,
+                                        size_t count, loff_t *ppos)
+{
+       struct i40e_pf *pf = filp->private_data;
+       int bytes_not_copied;
+       struct i40e_vsi *vsi;
+       int vsi_seid;
+       int i, cnt;
+
+       /* don't allow partial writes */
+       if (*ppos != 0)
+               return 0;
+       if (count >= sizeof(i40e_dbg_netdev_ops_buf))
+               return -ENOSPC;
+
+       memset(i40e_dbg_netdev_ops_buf, 0, sizeof(i40e_dbg_netdev_ops_buf));
+       bytes_not_copied = copy_from_user(i40e_dbg_netdev_ops_buf,
+                                         buffer, count);
+       if (bytes_not_copied < 0)
+               return bytes_not_copied;
+       else if (bytes_not_copied > 0)
+               count -= bytes_not_copied;
+       i40e_dbg_netdev_ops_buf[count] = '\0';
+
+       if (strncmp(i40e_dbg_netdev_ops_buf, "tx_timeout", 10) == 0) {
+               cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev, "tx_timeout <vsi_seid>\n");
+                       goto netdev_ops_write_done;
+               }
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "tx_timeout: VSI %d not found\n", vsi_seid);
+                       goto netdev_ops_write_done;
+               }
+               if (rtnl_trylock()) {
+                       vsi->netdev->netdev_ops->ndo_tx_timeout(vsi->netdev);
+                       rtnl_unlock();
+                       dev_info(&pf->pdev->dev, "tx_timeout called\n");
+               } else {
+                       dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
+               }
+       } else if (strncmp(i40e_dbg_netdev_ops_buf, "change_mtu", 10) == 0) {
+               int mtu;
+               cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i %i",
+                            &vsi_seid, &mtu);
+               if (cnt != 2) {
+                       dev_info(&pf->pdev->dev, "change_mtu <vsi_seid> <mtu>\n");
+                       goto netdev_ops_write_done;
+               }
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "change_mtu: VSI %d not found\n", vsi_seid);
+                       goto netdev_ops_write_done;
+               }
+               if (rtnl_trylock()) {
+                       vsi->netdev->netdev_ops->ndo_change_mtu(vsi->netdev,
+                                                               mtu);
+                       rtnl_unlock();
+                       dev_info(&pf->pdev->dev, "change_mtu called\n");
+               } else {
+                       dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
+               }
+
+       } else if (strncmp(i40e_dbg_netdev_ops_buf, "set_rx_mode", 11) == 0) {
+               cnt = sscanf(&i40e_dbg_netdev_ops_buf[11], "%i", &vsi_seid);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev, "set_rx_mode <vsi_seid>\n");
+                       goto netdev_ops_write_done;
+               }
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev,
+                                "set_rx_mode: VSI %d not found\n", vsi_seid);
+                       goto netdev_ops_write_done;
+               }
+               if (rtnl_trylock()) {
+                       vsi->netdev->netdev_ops->ndo_set_rx_mode(vsi->netdev);
+                       rtnl_unlock();
+                       dev_info(&pf->pdev->dev, "set_rx_mode called\n");
+               } else {
+                       dev_info(&pf->pdev->dev, "Could not acquire RTNL - please try again\n");
+               }
+
+       } else if (strncmp(i40e_dbg_netdev_ops_buf, "napi", 4) == 0) {
+               cnt = sscanf(&i40e_dbg_netdev_ops_buf[4], "%i", &vsi_seid);
+               if (cnt != 1) {
+                       dev_info(&pf->pdev->dev, "napi <vsi_seid>\n");
+                       goto netdev_ops_write_done;
+               }
+               vsi = i40e_dbg_find_vsi(pf, vsi_seid);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "napi: VSI %d not found\n",
+                                vsi_seid);
+                       goto netdev_ops_write_done;
+               }
+               for (i = 0; i < vsi->num_q_vectors; i++)
+                       napi_schedule(&vsi->q_vectors[i].napi);
+               dev_info(&pf->pdev->dev, "napi called\n");
+       } else {
+               dev_info(&pf->pdev->dev, "unknown command '%s'\n",
+                        i40e_dbg_netdev_ops_buf);
+               dev_info(&pf->pdev->dev, "available commands\n");
+               dev_info(&pf->pdev->dev, "  tx_timeout <vsi_seid>\n");
+               dev_info(&pf->pdev->dev, "  change_mtu <vsi_seid> <mtu>\n");
+               dev_info(&pf->pdev->dev, "  set_rx_mode <vsi_seid>\n");
+               dev_info(&pf->pdev->dev, "  napi <vsi_seid>\n");
+       }
+netdev_ops_write_done:
+       return count;
+}
+
+static const struct file_operations i40e_dbg_netdev_ops_fops = {
+       .owner = THIS_MODULE,
+       .open = simple_open,
+       .read = i40e_dbg_netdev_ops_read,
+       .write = i40e_dbg_netdev_ops_write,
+};
+
+/**
+ * i40e_dbg_pf_init - setup the debugfs directory for the pf
+ * @pf: the pf that is starting up
+ **/
+void i40e_dbg_pf_init(struct i40e_pf *pf)
+{
+       struct dentry *pfile __attribute__((unused));
+       const char *name = pci_name(pf->pdev);
+
+       pf->i40e_dbg_pf = debugfs_create_dir(name, i40e_dbg_root);
+       if (pf->i40e_dbg_pf) {
+               pfile = debugfs_create_file("command", 0600, pf->i40e_dbg_pf,
+                                           pf, &i40e_dbg_command_fops);
+               pfile = debugfs_create_file("dump", 0600, pf->i40e_dbg_pf, pf,
+                                           &i40e_dbg_dump_fops);
+               pfile = debugfs_create_file("netdev_ops", 0600, pf->i40e_dbg_pf,
+                                           pf, &i40e_dbg_netdev_ops_fops);
+       } else {
+               dev_info(&pf->pdev->dev,
+                        "debugfs entry for %s failed\n", name);
+       }
+}
+
+/**
+ * i40e_dbg_pf_exit - clear out the pf's debugfs entries
+ * @pf: the pf that is stopping
+ **/
+void i40e_dbg_pf_exit(struct i40e_pf *pf)
+{
+       debugfs_remove_recursive(pf->i40e_dbg_pf);
+       pf->i40e_dbg_pf = NULL;
+
+       kfree(i40e_dbg_dump_buf);
+       i40e_dbg_dump_buf = NULL;
+}
+
+/**
+ * i40e_dbg_init - start up debugfs for the driver
+ **/
+void i40e_dbg_init(void)
+{
+       i40e_dbg_root = debugfs_create_dir(i40e_driver_name, NULL);
+       if (!i40e_dbg_root)
+               pr_info("init of debugfs failed\n");
+}
+
+/**
+ * i40e_dbg_exit - clean out the driver's debugfs entries
+ **/
+void i40e_dbg_exit(void)
+{
+       debugfs_remove_recursive(i40e_dbg_root);
+       i40e_dbg_root = NULL;
+}
+
+#endif /* CONFIG_DEBUG_FS */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.c b/drivers/net/ethernet/intel/i40e/i40e_diag.c
new file mode 100644 (file)
index 0000000..de25514
--- /dev/null
@@ -0,0 +1,131 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_diag.h"
+#include "i40e_prototype.h"
+
+/**
+ * i40e_diag_reg_pattern_test
+ * @hw: pointer to the hw struct
+ * @reg: reg to be tested
+ * @mask: bits to be touched
+ **/
+static i40e_status i40e_diag_reg_pattern_test(struct i40e_hw *hw,
+                                                       u32 reg, u32 mask)
+{
+       const u32 patterns[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF};
+       u32 pat, val, orig_val;
+       int i;
+
+       orig_val = rd32(hw, reg);
+       for (i = 0; i < ARRAY_SIZE(patterns); i++) {
+               pat = patterns[i];
+               wr32(hw, reg, (pat & mask));
+               val = rd32(hw, reg);
+               if ((val & mask) != (pat & mask)) {
+                       i40e_debug(hw, I40E_DEBUG_DIAG,
+                                  "%s: reg pattern test failed - reg 0x%08x pat 0x%08x val 0x%08x\n",
+                                  __func__, reg, pat, val);
+                       return I40E_ERR_DIAG_TEST_FAILED;
+               }
+       }
+
+       wr32(hw, reg, orig_val);
+       val = rd32(hw, reg);
+       if (val != orig_val) {
+               i40e_debug(hw, I40E_DEBUG_DIAG,
+                          "%s: reg restore test failed - reg 0x%08x orig_val 0x%08x val 0x%08x\n",
+                          __func__, reg, orig_val, val);
+               return I40E_ERR_DIAG_TEST_FAILED;
+       }
+
+       return 0;
+}
+
+struct i40e_diag_reg_test_info i40e_reg_list[] = {
+       /* offset               mask         elements   stride */
+       {I40E_QTX_CTL(0),       0x0000FFBF,  64, I40E_QTX_CTL(1) - I40E_QTX_CTL(0)},
+       {I40E_PFINT_ITR0(0),    0x00000FFF,   3, I40E_PFINT_ITR0(1) - I40E_PFINT_ITR0(0)},
+       {I40E_PFINT_ITRN(0, 0), 0x00000FFF,  64, I40E_PFINT_ITRN(0, 1) - I40E_PFINT_ITRN(0, 0)},
+       {I40E_PFINT_ITRN(1, 0), 0x00000FFF,  64, I40E_PFINT_ITRN(1, 1) - I40E_PFINT_ITRN(1, 0)},
+       {I40E_PFINT_ITRN(2, 0), 0x00000FFF,  64, I40E_PFINT_ITRN(2, 1) - I40E_PFINT_ITRN(2, 0)},
+       {I40E_PFINT_STAT_CTL0,  0x0000000C,   1, 0},
+       {I40E_PFINT_LNKLST0,    0x00001FFF,   1, 0},
+       {I40E_PFINT_LNKLSTN(0), 0x000007FF, 511, I40E_PFINT_LNKLSTN(1) - I40E_PFINT_LNKLSTN(0)},
+       {I40E_QINT_TQCTL(0),    0x000000FF, I40E_QINT_TQCTL_MAX_INDEX + 1, I40E_QINT_TQCTL(1) - I40E_QINT_TQCTL(0)},
+       {I40E_QINT_RQCTL(0),    0x000000FF, I40E_QINT_RQCTL_MAX_INDEX + 1, I40E_QINT_RQCTL(1) - I40E_QINT_RQCTL(0)},
+       {I40E_PFINT_ICR0_ENA,   0xF7F20000,   1, 0},
+       { 0 }
+};
+
+/**
+ * i40e_diag_reg_test
+ * @hw: pointer to the hw struct
+ *
+ * Perform registers diagnostic test
+ **/
+i40e_status i40e_diag_reg_test(struct i40e_hw *hw)
+{
+       i40e_status ret_code = 0;
+       u32 reg, mask;
+       u32 i, j;
+
+       for (i = 0; (i40e_reg_list[i].offset != 0) && !ret_code; i++) {
+               mask = i40e_reg_list[i].mask;
+               for (j = 0; (j < i40e_reg_list[i].elements) && !ret_code; j++) {
+                       reg = i40e_reg_list[i].offset +
+                             (j * i40e_reg_list[i].stride);
+                       ret_code = i40e_diag_reg_pattern_test(hw, reg, mask);
+               }
+       }
+
+       return ret_code;
+}
+
+/**
+ * i40e_diag_eeprom_test
+ * @hw: pointer to the hw struct
+ *
+ * Perform EEPROM diagnostic test
+ **/
+i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw)
+{
+       i40e_status ret_code;
+       u16 reg_val;
+
+       /* read NVM control word and if NVM valid, validate EEPROM checksum*/
+       ret_code = i40e_read_nvm_word(hw, I40E_SR_NVM_CONTROL_WORD, &reg_val);
+       if ((!ret_code) &&
+           ((reg_val & I40E_SR_CONTROL_WORD_1_MASK) ==
+            (0x01 << I40E_SR_CONTROL_WORD_1_SHIFT))) {
+               ret_code = i40e_validate_nvm_checksum(hw, NULL);
+       } else {
+               ret_code = I40E_ERR_DIAG_TEST_FAILED;
+       }
+
+       return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_diag.h b/drivers/net/ethernet/intel/i40e/i40e_diag.h
new file mode 100644 (file)
index 0000000..3d98277
--- /dev/null
@@ -0,0 +1,52 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_DIAG_H_
+#define _I40E_DIAG_H_
+
+#include "i40e_type.h"
+
+enum i40e_lb_mode {
+       I40E_LB_MODE_NONE = 0,
+       I40E_LB_MODE_PHY_LOCAL,
+       I40E_LB_MODE_PHY_REMOTE,
+       I40E_LB_MODE_MAC_LOCAL,
+};
+
+struct i40e_diag_reg_test_info {
+       u32 offset;     /* the base register */
+       u32 mask;       /* bits that can be tested */
+       u32 elements;   /* number of elements if array */
+       u32 stride;     /* bytes between each element */
+};
+
+extern struct i40e_diag_reg_test_info i40e_reg_list[];
+
+i40e_status i40e_diag_reg_test(struct i40e_hw *hw);
+i40e_status i40e_diag_eeprom_test(struct i40e_hw *hw);
+
+#endif /* _I40E_DIAG_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_ethtool.c b/drivers/net/ethernet/intel/i40e/i40e_ethtool.c
new file mode 100644 (file)
index 0000000..9a76b8c
--- /dev/null
@@ -0,0 +1,1449 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+/* ethtool support for i40e */
+
+#include "i40e.h"
+#include "i40e_diag.h"
+
+struct i40e_stats {
+       char stat_string[ETH_GSTRING_LEN];
+       int sizeof_stat;
+       int stat_offset;
+};
+
+#define I40E_STAT(_type, _name, _stat) { \
+       .stat_string = _name, \
+       .sizeof_stat = FIELD_SIZEOF(_type, _stat), \
+       .stat_offset = offsetof(_type, _stat) \
+}
+#define I40E_NETDEV_STAT(_net_stat) \
+               I40E_STAT(struct net_device_stats, #_net_stat, _net_stat)
+#define I40E_PF_STAT(_name, _stat) \
+               I40E_STAT(struct i40e_pf, _name, _stat)
+#define I40E_VSI_STAT(_name, _stat) \
+               I40E_STAT(struct i40e_vsi, _name, _stat)
+
+static const struct i40e_stats i40e_gstrings_net_stats[] = {
+       I40E_NETDEV_STAT(rx_packets),
+       I40E_NETDEV_STAT(tx_packets),
+       I40E_NETDEV_STAT(rx_bytes),
+       I40E_NETDEV_STAT(tx_bytes),
+       I40E_NETDEV_STAT(rx_errors),
+       I40E_NETDEV_STAT(tx_errors),
+       I40E_NETDEV_STAT(rx_dropped),
+       I40E_NETDEV_STAT(tx_dropped),
+       I40E_NETDEV_STAT(multicast),
+       I40E_NETDEV_STAT(collisions),
+       I40E_NETDEV_STAT(rx_length_errors),
+       I40E_NETDEV_STAT(rx_crc_errors),
+};
+
+/* These PF_STATs might look like duplicates of some NETDEV_STATs,
+ * but they are separate.  This device supports Virtualization, and
+ * as such might have several netdevs supporting VMDq and FCoE going
+ * through a single port.  The NETDEV_STATs are for individual netdevs
+ * seen at the top of the stack, and the PF_STATs are for the physical
+ * function at the bottom of the stack hosting those netdevs.
+ *
+ * The PF_STATs are appended to the netdev stats only when ethtool -S
+ * is queried on the base PF netdev, not on the VMDq or FCoE netdev.
+ */
+static struct i40e_stats i40e_gstrings_stats[] = {
+       I40E_PF_STAT("rx_bytes", stats.eth.rx_bytes),
+       I40E_PF_STAT("tx_bytes", stats.eth.tx_bytes),
+       I40E_PF_STAT("rx_errors", stats.eth.rx_errors),
+       I40E_PF_STAT("tx_errors", stats.eth.tx_errors),
+       I40E_PF_STAT("rx_dropped", stats.eth.rx_discards),
+       I40E_PF_STAT("tx_dropped", stats.eth.tx_discards),
+       I40E_PF_STAT("tx_dropped_link_down", stats.tx_dropped_link_down),
+       I40E_PF_STAT("crc_errors", stats.crc_errors),
+       I40E_PF_STAT("illegal_bytes", stats.illegal_bytes),
+       I40E_PF_STAT("mac_local_faults", stats.mac_local_faults),
+       I40E_PF_STAT("mac_remote_faults", stats.mac_remote_faults),
+       I40E_PF_STAT("rx_length_errors", stats.rx_length_errors),
+       I40E_PF_STAT("link_xon_rx", stats.link_xon_rx),
+       I40E_PF_STAT("link_xoff_rx", stats.link_xoff_rx),
+       I40E_PF_STAT("link_xon_tx", stats.link_xon_tx),
+       I40E_PF_STAT("link_xoff_tx", stats.link_xoff_tx),
+       I40E_PF_STAT("rx_size_64", stats.rx_size_64),
+       I40E_PF_STAT("rx_size_127", stats.rx_size_127),
+       I40E_PF_STAT("rx_size_255", stats.rx_size_255),
+       I40E_PF_STAT("rx_size_511", stats.rx_size_511),
+       I40E_PF_STAT("rx_size_1023", stats.rx_size_1023),
+       I40E_PF_STAT("rx_size_1522", stats.rx_size_1522),
+       I40E_PF_STAT("rx_size_big", stats.rx_size_big),
+       I40E_PF_STAT("tx_size_64", stats.tx_size_64),
+       I40E_PF_STAT("tx_size_127", stats.tx_size_127),
+       I40E_PF_STAT("tx_size_255", stats.tx_size_255),
+       I40E_PF_STAT("tx_size_511", stats.tx_size_511),
+       I40E_PF_STAT("tx_size_1023", stats.tx_size_1023),
+       I40E_PF_STAT("tx_size_1522", stats.tx_size_1522),
+       I40E_PF_STAT("tx_size_big", stats.tx_size_big),
+       I40E_PF_STAT("rx_undersize", stats.rx_undersize),
+       I40E_PF_STAT("rx_fragments", stats.rx_fragments),
+       I40E_PF_STAT("rx_oversize", stats.rx_oversize),
+       I40E_PF_STAT("rx_jabber", stats.rx_jabber),
+       I40E_PF_STAT("VF_admin_queue_requests", vf_aq_requests),
+};
+
+#define I40E_QUEUE_STATS_LEN(n) \
+  ((((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs + \
+    ((struct i40e_netdev_priv *)netdev_priv((n)))->vsi->num_queue_pairs) * 2)
+#define I40E_GLOBAL_STATS_LEN  ARRAY_SIZE(i40e_gstrings_stats)
+#define I40E_NETDEV_STATS_LEN   ARRAY_SIZE(i40e_gstrings_net_stats)
+#define I40E_VSI_STATS_LEN(n)   (I40E_NETDEV_STATS_LEN + \
+                                I40E_QUEUE_STATS_LEN((n)))
+#define I40E_PFC_STATS_LEN ( \
+               (FIELD_SIZEOF(struct i40e_pf, stats.priority_xoff_rx) + \
+                FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_rx) + \
+                FIELD_SIZEOF(struct i40e_pf, stats.priority_xoff_tx) + \
+                FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_tx) + \
+                FIELD_SIZEOF(struct i40e_pf, stats.priority_xon_2_xoff)) \
+                / sizeof(u64))
+#define I40E_PF_STATS_LEN(n)   (I40E_GLOBAL_STATS_LEN + \
+                                I40E_PFC_STATS_LEN + \
+                                I40E_VSI_STATS_LEN((n)))
+
+enum i40e_ethtool_test_id {
+       I40E_ETH_TEST_REG = 0,
+       I40E_ETH_TEST_EEPROM,
+       I40E_ETH_TEST_INTR,
+       I40E_ETH_TEST_LOOPBACK,
+       I40E_ETH_TEST_LINK,
+};
+
+static const char i40e_gstrings_test[][ETH_GSTRING_LEN] = {
+       "Register test  (offline)",
+       "Eeprom test    (offline)",
+       "Interrupt test (offline)",
+       "Loopback test  (offline)",
+       "Link test   (on/offline)"
+};
+
+#define I40E_TEST_LEN (sizeof(i40e_gstrings_test) / ETH_GSTRING_LEN)
+
+/**
+ * i40e_get_settings - Get Link Speed and Duplex settings
+ * @netdev: network interface device structure
+ * @ecmd: ethtool command
+ *
+ * Reports speed/duplex settings based on media_type
+ **/
+static int i40e_get_settings(struct net_device *netdev,
+                            struct ethtool_cmd *ecmd)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+       bool link_up = hw_link_info->link_info & I40E_AQ_LINK_UP;
+       u32 link_speed = hw_link_info->link_speed;
+
+       /* hardware is either in 40G mode or 10G mode
+        * NOTE: this section initializes supported and advertising
+        */
+       switch (hw_link_info->phy_type) {
+       case I40E_PHY_TYPE_40GBASE_CR4:
+       case I40E_PHY_TYPE_40GBASE_CR4_CU:
+               ecmd->supported = SUPPORTED_40000baseCR4_Full;
+               ecmd->advertising = ADVERTISED_40000baseCR4_Full;
+               break;
+       case I40E_PHY_TYPE_40GBASE_KR4:
+               ecmd->supported = SUPPORTED_40000baseKR4_Full;
+               ecmd->advertising = ADVERTISED_40000baseKR4_Full;
+               break;
+       case I40E_PHY_TYPE_40GBASE_SR4:
+               ecmd->supported = SUPPORTED_40000baseSR4_Full;
+               ecmd->advertising = ADVERTISED_40000baseSR4_Full;
+               break;
+       case I40E_PHY_TYPE_40GBASE_LR4:
+               ecmd->supported = SUPPORTED_40000baseLR4_Full;
+               ecmd->advertising = ADVERTISED_40000baseLR4_Full;
+               break;
+       case I40E_PHY_TYPE_10GBASE_KX4:
+               ecmd->supported = SUPPORTED_10000baseKX4_Full;
+               ecmd->advertising = ADVERTISED_10000baseKX4_Full;
+               break;
+       case I40E_PHY_TYPE_10GBASE_KR:
+               ecmd->supported = SUPPORTED_10000baseKR_Full;
+               ecmd->advertising = ADVERTISED_10000baseKR_Full;
+               break;
+       case I40E_PHY_TYPE_10GBASE_T:
+       default:
+               ecmd->supported = SUPPORTED_10000baseT_Full;
+               ecmd->advertising = ADVERTISED_10000baseT_Full;
+               break;
+       }
+
+       /* for now just say autoneg all the time */
+       ecmd->supported |= SUPPORTED_Autoneg;
+
+       if (hw->phy.media_type == I40E_MEDIA_TYPE_BACKPLANE) {
+               ecmd->supported |= SUPPORTED_Backplane;
+               ecmd->advertising |= ADVERTISED_Backplane;
+               ecmd->port = PORT_NONE;
+       } else if (hw->phy.media_type == I40E_MEDIA_TYPE_BASET) {
+               ecmd->supported |= SUPPORTED_TP;
+               ecmd->advertising |= ADVERTISED_TP;
+               ecmd->port = PORT_TP;
+       } else {
+               ecmd->supported |= SUPPORTED_FIBRE;
+               ecmd->advertising |= ADVERTISED_FIBRE;
+               ecmd->port = PORT_FIBRE;
+       }
+
+       ecmd->transceiver = XCVR_EXTERNAL;
+
+       if (link_up) {
+               switch (link_speed) {
+               case I40E_LINK_SPEED_40GB:
+                       /* need a SPEED_40000 in ethtool.h */
+                       ethtool_cmd_speed_set(ecmd, 40000);
+                       break;
+               case I40E_LINK_SPEED_10GB:
+                       ethtool_cmd_speed_set(ecmd, SPEED_10000);
+                       break;
+               default:
+                       break;
+               }
+               ecmd->duplex = DUPLEX_FULL;
+       } else {
+               ethtool_cmd_speed_set(ecmd, SPEED_UNKNOWN);
+               ecmd->duplex = DUPLEX_UNKNOWN;
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_get_pauseparam -  Get Flow Control status
+ * Return tx/rx-pause status
+ **/
+static void i40e_get_pauseparam(struct net_device *netdev,
+                               struct ethtool_pauseparam *pause)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+
+       pause->autoneg =
+               ((hw_link_info->an_info & I40E_AQ_AN_COMPLETED) ?
+                 AUTONEG_ENABLE : AUTONEG_DISABLE);
+
+       pause->rx_pause = 0;
+       pause->tx_pause = 0;
+       if (hw_link_info->an_info & I40E_AQ_LINK_PAUSE_RX)
+               pause->rx_pause = 1;
+       if (hw_link_info->an_info & I40E_AQ_LINK_PAUSE_TX)
+               pause->tx_pause = 1;
+}
+
+static u32 i40e_get_msglevel(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+
+       return pf->msg_enable;
+}
+
+static void i40e_set_msglevel(struct net_device *netdev, u32 data)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+
+       if (I40E_DEBUG_USER & data)
+               pf->hw.debug_mask = data;
+       pf->msg_enable = data;
+}
+
+static int i40e_get_regs_len(struct net_device *netdev)
+{
+       int reg_count = 0;
+       int i;
+
+       for (i = 0; i40e_reg_list[i].offset != 0; i++)
+               reg_count += i40e_reg_list[i].elements;
+
+       return reg_count * sizeof(u32);
+}
+
+static void i40e_get_regs(struct net_device *netdev, struct ethtool_regs *regs,
+                         void *p)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 *reg_buf = p;
+       int i, j, ri;
+       u32 reg;
+
+       /* Tell ethtool which driver-version-specific regs output we have.
+        *
+        * At some point, if we have ethtool doing special formatting of
+        * this data, it will rely on this version number to know how to
+        * interpret things.  Hence, this needs to be updated if/when the
+        * diags register table is changed.
+        */
+       regs->version = 1;
+
+       /* loop through the diags reg table for what to print */
+       ri = 0;
+       for (i = 0; i40e_reg_list[i].offset != 0; i++) {
+               for (j = 0; j < i40e_reg_list[i].elements; j++) {
+                       reg = i40e_reg_list[i].offset
+                               + (j * i40e_reg_list[i].stride);
+                       reg_buf[ri++] = rd32(hw, reg);
+               }
+       }
+
+}
+
+static int i40e_get_eeprom(struct net_device *netdev,
+                          struct ethtool_eeprom *eeprom, u8 *bytes)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_hw *hw = &np->vsi->back->hw;
+       int first_word, last_word;
+       u16 i, eeprom_len;
+       u16 *eeprom_buff;
+       int ret_val = 0;
+
+       if (eeprom->len == 0)
+               return -EINVAL;
+
+       eeprom->magic = hw->vendor_id | (hw->device_id << 16);
+
+       first_word = eeprom->offset >> 1;
+       last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+       eeprom_len = last_word - first_word + 1;
+
+       eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL);
+       if (!eeprom_buff)
+               return -ENOMEM;
+
+       ret_val = i40e_read_nvm_buffer(hw, first_word, &eeprom_len,
+                                          eeprom_buff);
+       if (eeprom_len == 0) {
+               kfree(eeprom_buff);
+               return -EACCES;
+       }
+
+       /* Device's eeprom is always little-endian, word addressable */
+       for (i = 0; i < eeprom_len; i++)
+               le16_to_cpus(&eeprom_buff[i]);
+
+       memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len);
+       kfree(eeprom_buff);
+
+       return ret_val;
+}
+
+static int i40e_get_eeprom_len(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_hw *hw = &np->vsi->back->hw;
+
+       return hw->nvm.sr_size * 2;
+}
+
+static void i40e_get_drvinfo(struct net_device *netdev,
+                            struct ethtool_drvinfo *drvinfo)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+
+       strlcpy(drvinfo->driver, i40e_driver_name, sizeof(drvinfo->driver));
+       strlcpy(drvinfo->version, i40e_driver_version_str,
+               sizeof(drvinfo->version));
+       strlcpy(drvinfo->fw_version, i40e_fw_version_str(&pf->hw),
+               sizeof(drvinfo->fw_version));
+       strlcpy(drvinfo->bus_info, pci_name(pf->pdev),
+               sizeof(drvinfo->bus_info));
+}
+
+static void i40e_get_ringparam(struct net_device *netdev,
+                              struct ethtool_ringparam *ring)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
+
+       ring->rx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+       ring->tx_max_pending = I40E_MAX_NUM_DESCRIPTORS;
+       ring->rx_mini_max_pending = 0;
+       ring->rx_jumbo_max_pending = 0;
+       ring->rx_pending = vsi->rx_rings[0].count;
+       ring->tx_pending = vsi->tx_rings[0].count;
+       ring->rx_mini_pending = 0;
+       ring->rx_jumbo_pending = 0;
+}
+
+static int i40e_set_ringparam(struct net_device *netdev,
+                             struct ethtool_ringparam *ring)
+{
+       struct i40e_ring *tx_rings = NULL, *rx_rings = NULL;
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u32 new_rx_count, new_tx_count;
+       int i, err = 0;
+
+       if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+               return -EINVAL;
+
+       new_tx_count = clamp_t(u32, ring->tx_pending,
+                              I40E_MIN_NUM_DESCRIPTORS,
+                              I40E_MAX_NUM_DESCRIPTORS);
+       new_tx_count = ALIGN(new_tx_count, I40E_REQ_DESCRIPTOR_MULTIPLE);
+
+       new_rx_count = clamp_t(u32, ring->rx_pending,
+                              I40E_MIN_NUM_DESCRIPTORS,
+                              I40E_MAX_NUM_DESCRIPTORS);
+       new_rx_count = ALIGN(new_rx_count, I40E_REQ_DESCRIPTOR_MULTIPLE);
+
+       /* if nothing to do return success */
+       if ((new_tx_count == vsi->tx_rings[0].count) &&
+           (new_rx_count == vsi->rx_rings[0].count))
+               return 0;
+
+       while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state))
+               usleep_range(1000, 2000);
+
+       if (!netif_running(vsi->netdev)) {
+               /* simple case - set for the next time the netdev is started */
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       vsi->tx_rings[i].count = new_tx_count;
+                       vsi->rx_rings[i].count = new_rx_count;
+               }
+               goto done;
+       }
+
+       /* We can't just free everything and then setup again,
+        * because the ISRs in MSI-X mode get passed pointers
+        * to the Tx and Rx ring structs.
+        */
+
+       /* alloc updated Tx resources */
+       if (new_tx_count != vsi->tx_rings[0].count) {
+               netdev_info(netdev,
+                           "Changing Tx descriptor count from %d to %d.\n",
+                           vsi->tx_rings[0].count, new_tx_count);
+               tx_rings = kcalloc(vsi->alloc_queue_pairs,
+                                  sizeof(struct i40e_ring), GFP_KERNEL);
+               if (!tx_rings) {
+                       err = -ENOMEM;
+                       goto done;
+               }
+
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       /* clone ring and setup updated count */
+                       tx_rings[i] = vsi->tx_rings[i];
+                       tx_rings[i].count = new_tx_count;
+                       err = i40e_setup_tx_descriptors(&tx_rings[i]);
+                       if (err) {
+                               while (i) {
+                                       i--;
+                                       i40e_free_tx_resources(&tx_rings[i]);
+                               }
+                               kfree(tx_rings);
+                               tx_rings = NULL;
+
+                               goto done;
+                       }
+               }
+       }
+
+       /* alloc updated Rx resources */
+       if (new_rx_count != vsi->rx_rings[0].count) {
+               netdev_info(netdev,
+                           "Changing Rx descriptor count from %d to %d\n",
+                           vsi->rx_rings[0].count, new_rx_count);
+               rx_rings = kcalloc(vsi->alloc_queue_pairs,
+                                  sizeof(struct i40e_ring), GFP_KERNEL);
+               if (!rx_rings) {
+                       err = -ENOMEM;
+                       goto free_tx;
+               }
+
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       /* clone ring and setup updated count */
+                       rx_rings[i] = vsi->rx_rings[i];
+                       rx_rings[i].count = new_rx_count;
+                       err = i40e_setup_rx_descriptors(&rx_rings[i]);
+                       if (err) {
+                               while (i) {
+                                       i--;
+                                       i40e_free_rx_resources(&rx_rings[i]);
+                               }
+                               kfree(rx_rings);
+                               rx_rings = NULL;
+
+                               goto free_tx;
+                       }
+               }
+       }
+
+       /* Bring interface down, copy in the new ring info,
+        * then restore the interface
+        */
+       i40e_down(vsi);
+
+       if (tx_rings) {
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       i40e_free_tx_resources(&vsi->tx_rings[i]);
+                       vsi->tx_rings[i] = tx_rings[i];
+               }
+               kfree(tx_rings);
+               tx_rings = NULL;
+       }
+
+       if (rx_rings) {
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       i40e_free_rx_resources(&vsi->rx_rings[i]);
+                       vsi->rx_rings[i] = rx_rings[i];
+               }
+               kfree(rx_rings);
+               rx_rings = NULL;
+       }
+
+       i40e_up(vsi);
+
+free_tx:
+       /* error cleanup if the Rx allocations failed after getting Tx */
+       if (tx_rings) {
+               for (i = 0; i < vsi->num_queue_pairs; i++)
+                       i40e_free_tx_resources(&tx_rings[i]);
+               kfree(tx_rings);
+               tx_rings = NULL;
+       }
+
+done:
+       clear_bit(__I40E_CONFIG_BUSY, &pf->state);
+
+       return err;
+}
+
+static int i40e_get_sset_count(struct net_device *netdev, int sset)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+
+       switch (sset) {
+       case ETH_SS_TEST:
+               return I40E_TEST_LEN;
+       case ETH_SS_STATS:
+               if (vsi == pf->vsi[pf->lan_vsi])
+                       return I40E_PF_STATS_LEN(netdev);
+               else
+                       return I40E_VSI_STATS_LEN(netdev);
+       default:
+               return -EOPNOTSUPP;
+       }
+}
+
+static void i40e_get_ethtool_stats(struct net_device *netdev,
+                                  struct ethtool_stats *stats, u64 *data)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       int i = 0;
+       char *p;
+       int j;
+       struct rtnl_link_stats64 *net_stats = i40e_get_vsi_stats_struct(vsi);
+
+       i40e_update_stats(vsi);
+
+       for (j = 0; j < I40E_NETDEV_STATS_LEN; j++) {
+               p = (char *)net_stats + i40e_gstrings_net_stats[j].stat_offset;
+               data[i++] = (i40e_gstrings_net_stats[j].sizeof_stat ==
+                       sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+       }
+       for (j = 0; j < vsi->num_queue_pairs; j++) {
+               data[i++] = vsi->tx_rings[j].tx_stats.packets;
+               data[i++] = vsi->tx_rings[j].tx_stats.bytes;
+       }
+       for (j = 0; j < vsi->num_queue_pairs; j++) {
+               data[i++] = vsi->rx_rings[j].rx_stats.packets;
+               data[i++] = vsi->rx_rings[j].rx_stats.bytes;
+       }
+       if (vsi == pf->vsi[pf->lan_vsi]) {
+               for (j = 0; j < I40E_GLOBAL_STATS_LEN; j++) {
+                       p = (char *)pf + i40e_gstrings_stats[j].stat_offset;
+                       data[i++] = (i40e_gstrings_stats[j].sizeof_stat ==
+                                  sizeof(u64)) ? *(u64 *)p : *(u32 *)p;
+               }
+               for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
+                       data[i++] = pf->stats.priority_xon_tx[j];
+                       data[i++] = pf->stats.priority_xoff_tx[j];
+               }
+               for (j = 0; j < I40E_MAX_USER_PRIORITY; j++) {
+                       data[i++] = pf->stats.priority_xon_rx[j];
+                       data[i++] = pf->stats.priority_xoff_rx[j];
+               }
+               for (j = 0; j < I40E_MAX_USER_PRIORITY; j++)
+                       data[i++] = pf->stats.priority_xon_2_xoff[j];
+       }
+}
+
+static void i40e_get_strings(struct net_device *netdev, u32 stringset,
+                            u8 *data)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       char *p = (char *)data;
+       int i;
+
+       switch (stringset) {
+       case ETH_SS_TEST:
+               for (i = 0; i < I40E_TEST_LEN; i++) {
+                       memcpy(data, i40e_gstrings_test[i], ETH_GSTRING_LEN);
+                       data += ETH_GSTRING_LEN;
+               }
+               break;
+       case ETH_SS_STATS:
+               for (i = 0; i < I40E_NETDEV_STATS_LEN; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "%s",
+                                i40e_gstrings_net_stats[i].stat_string);
+                       p += ETH_GSTRING_LEN;
+               }
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_packets", i);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "tx-%u.tx_bytes", i);
+                       p += ETH_GSTRING_LEN;
+               }
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_packets", i);
+                       p += ETH_GSTRING_LEN;
+                       snprintf(p, ETH_GSTRING_LEN, "rx-%u.rx_bytes", i);
+                       p += ETH_GSTRING_LEN;
+               }
+               if (vsi == pf->vsi[pf->lan_vsi]) {
+                       for (i = 0; i < I40E_GLOBAL_STATS_LEN; i++) {
+                               snprintf(p, ETH_GSTRING_LEN, "port.%s",
+                                        i40e_gstrings_stats[i].stat_string);
+                               p += ETH_GSTRING_LEN;
+                       }
+                       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+                               snprintf(p, ETH_GSTRING_LEN,
+                                        "port.tx_priority_%u_xon", i);
+                               p += ETH_GSTRING_LEN;
+                               snprintf(p, ETH_GSTRING_LEN,
+                                        "port.tx_priority_%u_xoff", i);
+                               p += ETH_GSTRING_LEN;
+                       }
+                       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+                               snprintf(p, ETH_GSTRING_LEN,
+                                        "port.rx_priority_%u_xon", i);
+                               p += ETH_GSTRING_LEN;
+                               snprintf(p, ETH_GSTRING_LEN,
+                                        "port.rx_priority_%u_xoff", i);
+                               p += ETH_GSTRING_LEN;
+                       }
+                       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+                               snprintf(p, ETH_GSTRING_LEN,
+                                        "port.rx_priority_%u_xon_2_xoff", i);
+                               p += ETH_GSTRING_LEN;
+                       }
+               }
+               /* BUG_ON(p - data != I40E_STATS_LEN * ETH_GSTRING_LEN); */
+               break;
+       }
+}
+
+static int i40e_get_ts_info(struct net_device *dev,
+                           struct ethtool_ts_info *info)
+{
+       return ethtool_op_get_ts_info(dev, info);
+}
+
+static int i40e_link_test(struct i40e_pf *pf, u64 *data)
+{
+       if (i40e_get_link_status(&pf->hw))
+               *data = 0;
+       else
+               *data = 1;
+
+       return *data;
+}
+
+static int i40e_reg_test(struct i40e_pf *pf, u64 *data)
+{
+       i40e_status ret;
+
+       ret = i40e_diag_reg_test(&pf->hw);
+       *data = ret;
+
+       return ret;
+}
+
+static int i40e_eeprom_test(struct i40e_pf *pf, u64 *data)
+{
+       i40e_status ret;
+
+       ret = i40e_diag_eeprom_test(&pf->hw);
+       *data = ret;
+
+       return ret;
+}
+
+static int i40e_intr_test(struct i40e_pf *pf, u64 *data)
+{
+       *data = -ENOSYS;
+
+       return *data;
+}
+
+static int i40e_loopback_test(struct i40e_pf *pf, u64 *data)
+{
+       *data = -ENOSYS;
+
+       return *data;
+}
+
+static void i40e_diag_test(struct net_device *netdev,
+                          struct ethtool_test *eth_test, u64 *data)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+
+       set_bit(__I40E_TESTING, &pf->state);
+       if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+               /* Offline tests */
+
+               netdev_info(netdev, "offline testing starting\n");
+
+               /* Link test performed before hardware reset
+                * so autoneg doesn't interfere with test result
+                */
+               netdev_info(netdev, "link test starting\n");
+               if (i40e_link_test(pf, &data[I40E_ETH_TEST_LINK]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+               netdev_info(netdev, "register test starting\n");
+               if (i40e_reg_test(pf, &data[I40E_ETH_TEST_REG]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+               i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+               netdev_info(netdev, "eeprom test starting\n");
+               if (i40e_eeprom_test(pf, &data[I40E_ETH_TEST_EEPROM]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+               i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+               netdev_info(netdev, "interrupt test starting\n");
+               if (i40e_intr_test(pf, &data[I40E_ETH_TEST_INTR]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+               i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+               netdev_info(netdev, "loopback test starting\n");
+               if (i40e_loopback_test(pf, &data[I40E_ETH_TEST_LOOPBACK]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+       } else {
+               netdev_info(netdev, "online test starting\n");
+               /* Online tests */
+               if (i40e_link_test(pf, &data[I40E_ETH_TEST_LINK]))
+                       eth_test->flags |= ETH_TEST_FL_FAILED;
+
+               /* Offline only tests, not run in online; pass by default */
+               data[I40E_ETH_TEST_REG] = 0;
+               data[I40E_ETH_TEST_EEPROM] = 0;
+               data[I40E_ETH_TEST_INTR] = 0;
+               data[I40E_ETH_TEST_LOOPBACK] = 0;
+
+               clear_bit(__I40E_TESTING, &pf->state);
+       }
+}
+
+static void i40e_get_wol(struct net_device *netdev,
+                        struct ethtool_wolinfo *wol)
+{
+       wol->supported = 0;
+       wol->wolopts = 0;
+}
+
+static int i40e_nway_reset(struct net_device *netdev)
+{
+       /* restart autonegotiation */
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status ret = 0;
+
+       ret = i40e_aq_set_link_restart_an(hw, NULL);
+       if (ret) {
+               netdev_info(netdev, "link restart failed, aq_err=%d\n",
+                           pf->hw.aq.asq_last_status);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+static int i40e_set_phys_id(struct net_device *netdev,
+                           enum ethtool_phys_id_state state)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int blink_freq = 2;
+
+       switch (state) {
+       case ETHTOOL_ID_ACTIVE:
+               pf->led_status = i40e_led_get(hw);
+               return blink_freq;
+       case ETHTOOL_ID_ON:
+               i40e_led_set(hw, 0xF);
+               break;
+       case ETHTOOL_ID_OFF:
+               i40e_led_set(hw, 0x0);
+               break;
+       case ETHTOOL_ID_INACTIVE:
+               i40e_led_set(hw, pf->led_status);
+               break;
+       }
+
+       return 0;
+}
+
+/* NOTE: i40e hardware uses a conversion factor of 2 for Interrupt
+ * Throttle Rate (ITR) ie. ITR(1) = 2us ITR(10) = 20 us, and also
+ * 125us (8000 interrupts per second) == ITR(62)
+ */
+
+static int i40e_get_coalesce(struct net_device *netdev,
+                            struct ethtool_coalesce *ec)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       ec->tx_max_coalesced_frames_irq = vsi->work_limit;
+       ec->rx_max_coalesced_frames_irq = vsi->work_limit;
+
+       if (ITR_IS_DYNAMIC(vsi->rx_itr_setting))
+               ec->rx_coalesce_usecs = 1;
+       else
+               ec->rx_coalesce_usecs = vsi->rx_itr_setting;
+
+       if (ITR_IS_DYNAMIC(vsi->tx_itr_setting))
+               ec->tx_coalesce_usecs = 1;
+       else
+               ec->tx_coalesce_usecs = vsi->tx_itr_setting;
+
+       return 0;
+}
+
+static int i40e_set_coalesce(struct net_device *netdev,
+                            struct ethtool_coalesce *ec)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_q_vector *q_vector;
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u16 vector;
+       int i;
+
+       if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
+               vsi->work_limit = ec->tx_max_coalesced_frames_irq;
+
+       switch (ec->rx_coalesce_usecs) {
+       case 0:
+               vsi->rx_itr_setting = 0;
+               break;
+       case 1:
+               vsi->rx_itr_setting = (I40E_ITR_DYNAMIC |
+                                      ITR_REG_TO_USEC(I40E_ITR_RX_DEF));
+               break;
+       default:
+               if ((ec->rx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+                   (ec->rx_coalesce_usecs > (I40E_MAX_ITR << 1)))
+                       return -EINVAL;
+               vsi->rx_itr_setting = ec->rx_coalesce_usecs;
+               break;
+       }
+
+       switch (ec->tx_coalesce_usecs) {
+       case 0:
+               vsi->tx_itr_setting = 0;
+               break;
+       case 1:
+               vsi->tx_itr_setting = (I40E_ITR_DYNAMIC |
+                                      ITR_REG_TO_USEC(I40E_ITR_TX_DEF));
+               break;
+       default:
+               if ((ec->tx_coalesce_usecs < (I40E_MIN_ITR << 1)) ||
+                   (ec->tx_coalesce_usecs > (I40E_MAX_ITR << 1)))
+                       return -EINVAL;
+               vsi->tx_itr_setting = ec->tx_coalesce_usecs;
+               break;
+       }
+
+       vector = vsi->base_vector;
+       q_vector = vsi->q_vectors;
+       for (i = 0; i < vsi->num_q_vectors; i++, vector++, q_vector++) {
+               q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
+               wr32(hw, I40E_PFINT_ITRN(0, vector - 1), q_vector->rx.itr);
+               q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
+               wr32(hw, I40E_PFINT_ITRN(1, vector - 1), q_vector->tx.itr);
+               i40e_flush(hw);
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_get_rss_hash_opts - Get RSS hash Input Set for each flow type
+ * @pf: pointer to the physical function struct
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow is supported, else Invalid Input.
+ **/
+static int i40e_get_rss_hash_opts(struct i40e_pf *pf, struct ethtool_rxnfc *cmd)
+{
+       cmd->data = 0;
+
+       /* Report default options for RSS on i40e */
+       switch (cmd->flow_type) {
+       case TCP_V4_FLOW:
+       case UDP_V4_FLOW:
+               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+       /* fall through to add IP fields */
+       case SCTP_V4_FLOW:
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case IPV4_FLOW:
+               cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+               break;
+       case TCP_V6_FLOW:
+       case UDP_V6_FLOW:
+               cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+       /* fall through to add IP fields */
+       case SCTP_V6_FLOW:
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case IPV6_FLOW:
+               cmd->data |= RXH_IP_SRC | RXH_IP_DST;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_get_rxnfc - command to get RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
+                         u32 *rule_locs)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       int ret = -EOPNOTSUPP;
+
+       switch (cmd->cmd) {
+       case ETHTOOL_GRXRINGS:
+               cmd->data = vsi->alloc_queue_pairs;
+               ret = 0;
+               break;
+       case ETHTOOL_GRXFH:
+               ret = i40e_get_rss_hash_opts(pf, cmd);
+               break;
+       case ETHTOOL_GRXCLSRLCNT:
+               ret = 0;
+               break;
+       case ETHTOOL_GRXCLSRULE:
+               ret = 0;
+               break;
+       case ETHTOOL_GRXCLSRLALL:
+               cmd->data = 500;
+               ret = 0;
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_set_rss_hash_opt - Enable/Disable flow types for RSS hash
+ * @pf: pointer to the physical function struct
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the flow input set is supported.
+ **/
+static int i40e_set_rss_hash_opt(struct i40e_pf *pf, struct ethtool_rxnfc *nfc)
+{
+       struct i40e_hw *hw = &pf->hw;
+       u64 hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
+                  ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32);
+
+       /* RSS does not support anything other than hashing
+        * to queues on src and dst IPs and ports
+        */
+       if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+                         RXH_L4_B_0_1 | RXH_L4_B_2_3))
+               return -EINVAL;
+
+       /* We need at least the IP SRC and DEST fields for hashing */
+       if (!(nfc->data & RXH_IP_SRC) ||
+           !(nfc->data & RXH_IP_DST))
+               return -EINVAL;
+
+       switch (nfc->flow_type) {
+       case TCP_V4_FLOW:
+               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               case 0:
+                       hena &= ~((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP);
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       case TCP_V6_FLOW:
+               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               case 0:
+                       hena &= ~((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP);
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       case UDP_V4_FLOW:
+               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               case 0:
+                       hena &=
+                       ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       hena |=
+                       (((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP)  |
+                       ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4));
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       case UDP_V6_FLOW:
+               switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+               case 0:
+                       hena &=
+                       ~(((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+                       break;
+               case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+                       hena |=
+                       (((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP)  |
+                       ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6));
+                       break;
+               default:
+                       return -EINVAL;
+               }
+               break;
+       case AH_ESP_V4_FLOW:
+       case AH_V4_FLOW:
+       case ESP_V4_FLOW:
+       case SCTP_V4_FLOW:
+               if ((nfc->data & RXH_L4_B_0_1) ||
+                   (nfc->data & RXH_L4_B_2_3))
+                       return -EINVAL;
+               hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER);
+               break;
+       case AH_ESP_V6_FLOW:
+       case AH_V6_FLOW:
+       case ESP_V6_FLOW:
+       case SCTP_V6_FLOW:
+               if ((nfc->data & RXH_L4_B_0_1) ||
+                   (nfc->data & RXH_L4_B_2_3))
+                       return -EINVAL;
+               hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER);
+               break;
+       case IPV4_FLOW:
+               hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_OTHER) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4);
+               break;
+       case IPV6_FLOW:
+               hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_OTHER) |
+                       ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       wr32(hw, I40E_PFQF_HENA(0), (u32)hena);
+       wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
+       i40e_flush(hw);
+
+       return 0;
+}
+
+#define IP_HEADER_OFFSET 14
+/**
+ * i40e_add_del_fdir_udpv4 - Add/Remove UDPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required from the FDir descriptor
+ * @ethtool_rx_flow_spec: the flow spec
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_udpv4(struct i40e_vsi *vsi,
+                                  struct i40e_fdir_data *fd_data,
+                                  struct ethtool_rx_flow_spec *fsp, bool add)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct udphdr *udp;
+       struct iphdr *ip;
+       bool err = false;
+       int ret;
+       int i;
+
+       ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
+       udp = (struct udphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
+             + sizeof(struct iphdr));
+
+       ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
+       ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
+       udp->source = fsp->h_u.tcp_ip4_spec.psrc;
+       udp->dest = fsp->h_u.tcp_ip4_spec.pdst;
+
+       for (i = I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP;
+            i <= I40E_FILTER_PCTYPE_NONF_IPV4_UDP; i++) {
+               fd_data->pctype = i;
+               ret = i40e_program_fdir_filter(fd_data, pf, add);
+
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Filter command send failed for PCTYPE %d (ret = %d)\n",
+                                fd_data->pctype, ret);
+                       err = true;
+               } else {
+                       dev_info(&pf->pdev->dev,
+                                "Filter OK for PCTYPE %d (ret = %d)\n",
+                                fd_data->pctype, ret);
+               }
+       }
+
+       return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir_tcpv4 - Add/Remove TCPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required from the FDir descriptor
+ * @ethtool_rx_flow_spec: the flow spec
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_tcpv4(struct i40e_vsi *vsi,
+                                  struct i40e_fdir_data *fd_data,
+                                  struct ethtool_rx_flow_spec *fsp, bool add)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct tcphdr *tcp;
+       struct iphdr *ip;
+       bool err = false;
+       int ret;
+
+       ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
+       tcp = (struct tcphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET
+             + sizeof(struct iphdr));
+
+       ip->daddr = fsp->h_u.tcp_ip4_spec.ip4dst;
+       tcp->dest = fsp->h_u.tcp_ip4_spec.pdst;
+
+       fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN;
+       ret = i40e_program_fdir_filter(fd_data, pf, add);
+
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Filter command send failed for PCTYPE %d (ret = %d)\n",
+                        fd_data->pctype, ret);
+               err = true;
+       } else {
+               dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+                        fd_data->pctype, ret);
+       }
+
+       ip->saddr = fsp->h_u.tcp_ip4_spec.ip4src;
+       tcp->source = fsp->h_u.tcp_ip4_spec.psrc;
+
+       fd_data->pctype = I40E_FILTER_PCTYPE_NONF_IPV4_TCP;
+
+       ret = i40e_program_fdir_filter(fd_data, pf, add);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "Filter command send failed for PCTYPE %d (ret = %d)\n",
+                        fd_data->pctype, ret);
+               err = true;
+       } else {
+               dev_info(&pf->pdev->dev, "Filter OK for PCTYPE %d (ret = %d)\n",
+                         fd_data->pctype, ret);
+       }
+
+       return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir_sctpv4 - Add/Remove SCTPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required from the FDir descriptor
+ * @ethtool_rx_flow_spec: the flow spec
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_sctpv4(struct i40e_vsi *vsi,
+                                   struct i40e_fdir_data *fd_data,
+                                   struct ethtool_rx_flow_spec *fsp, bool add)
+{
+       return -EOPNOTSUPP;
+}
+
+/**
+ * i40e_add_del_fdir_ipv4 - Add/Remove IPv4 Flow Director filters for
+ * a specific flow spec
+ * @vsi: pointer to the targeted VSI
+ * @fd_data: the flow director data required for the FDir descriptor
+ * @fsp: the ethtool flow spec
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_ipv4(struct i40e_vsi *vsi,
+                                 struct i40e_fdir_data *fd_data,
+                                 struct ethtool_rx_flow_spec *fsp, bool add)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct iphdr *ip;
+       bool err = false;
+       int ret;
+       int i;
+
+       ip = (struct iphdr *)(fd_data->raw_packet + IP_HEADER_OFFSET);
+
+       ip->saddr = fsp->h_u.usr_ip4_spec.ip4src;
+       ip->daddr = fsp->h_u.usr_ip4_spec.ip4dst;
+       ip->protocol = fsp->h_u.usr_ip4_spec.proto;
+
+       for (i = I40E_FILTER_PCTYPE_NONF_IPV4_OTHER;
+            i <= I40E_FILTER_PCTYPE_FRAG_IPV4; i++) {
+               fd_data->pctype = i;
+               ret = i40e_program_fdir_filter(fd_data, pf, add);
+
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "Filter command send failed for PCTYPE %d (ret = %d)\n",
+                                fd_data->pctype, ret);
+                       err = true;
+               } else {
+                       dev_info(&pf->pdev->dev,
+                                "Filter OK for PCTYPE %d (ret = %d)\n",
+                                fd_data->pctype, ret);
+               }
+       }
+
+       return err ? -EOPNOTSUPP : 0;
+}
+
+/**
+ * i40e_add_del_fdir_ethtool - Add/Remove Flow Director filters for
+ * a specific flow spec based on their protocol
+ * @vsi: pointer to the targeted VSI
+ * @cmd: command to get or set RX flow classification rules
+ * @add: true adds a filter, false removes it
+ *
+ * Returns 0 if the filters were successfully added or removed
+ **/
+static int i40e_add_del_fdir_ethtool(struct i40e_vsi *vsi,
+                       struct ethtool_rxnfc *cmd, bool add)
+{
+       struct i40e_fdir_data fd_data;
+       int ret = -EINVAL;
+       struct i40e_pf *pf;
+       struct ethtool_rx_flow_spec *fsp =
+               (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+       if (!vsi)
+               return -EINVAL;
+
+       pf = vsi->back;
+
+       if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) &&
+           (fsp->ring_cookie >= vsi->num_queue_pairs))
+               return -EINVAL;
+
+       /* Populate the Flow Director that we have at the moment
+        * and allocate the raw packet buffer for the calling functions
+        */
+       fd_data.raw_packet = kzalloc(I40E_FDIR_MAX_RAW_PACKET_LOOKUP,
+                                    GFP_KERNEL);
+
+       if (!fd_data.raw_packet) {
+               dev_info(&pf->pdev->dev, "Could not allocate memory\n");
+               return -ENOMEM;
+       }
+
+       fd_data.q_index = fsp->ring_cookie;
+       fd_data.flex_off = 0;
+       fd_data.pctype = 0;
+       fd_data.dest_vsi = vsi->id;
+       fd_data.dest_ctl = 0;
+       fd_data.fd_status = 0;
+       fd_data.cnt_index = 0;
+       fd_data.fd_id = 0;
+
+       switch (fsp->flow_type & ~FLOW_EXT) {
+       case TCP_V4_FLOW:
+               ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
+               break;
+       case UDP_V4_FLOW:
+               ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
+               break;
+       case SCTP_V4_FLOW:
+               ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
+               break;
+       case IPV4_FLOW:
+               ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
+               break;
+       case IP_USER_FLOW:
+               switch (fsp->h_u.usr_ip4_spec.proto) {
+               case IPPROTO_TCP:
+                       ret = i40e_add_del_fdir_tcpv4(vsi, &fd_data, fsp, add);
+                       break;
+               case IPPROTO_UDP:
+                       ret = i40e_add_del_fdir_udpv4(vsi, &fd_data, fsp, add);
+                       break;
+               case IPPROTO_SCTP:
+                       ret = i40e_add_del_fdir_sctpv4(vsi, &fd_data, fsp, add);
+                       break;
+               default:
+                       ret = i40e_add_del_fdir_ipv4(vsi, &fd_data, fsp, add);
+                       break;
+               }
+               break;
+       default:
+               dev_info(&pf->pdev->dev, "Could not specify spec type\n");
+               ret = -EINVAL;
+       }
+
+       kfree(fd_data.raw_packet);
+       fd_data.raw_packet = NULL;
+
+       return ret;
+}
+/**
+ * i40e_set_rxnfc - command to set RX flow classification rules
+ * @netdev: network interface device structure
+ * @cmd: ethtool rxnfc command
+ *
+ * Returns Success if the command is supported.
+ **/
+static int i40e_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       int ret = -EOPNOTSUPP;
+
+       switch (cmd->cmd) {
+       case ETHTOOL_SRXFH:
+               ret = i40e_set_rss_hash_opt(pf, cmd);
+               break;
+       case ETHTOOL_SRXCLSRLINS:
+               ret = i40e_add_del_fdir_ethtool(vsi, cmd, true);
+               break;
+       case ETHTOOL_SRXCLSRLDEL:
+               ret = i40e_add_del_fdir_ethtool(vsi, cmd, false);
+               break;
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static const struct ethtool_ops i40e_ethtool_ops = {
+       .get_settings           = i40e_get_settings,
+       .get_drvinfo            = i40e_get_drvinfo,
+       .get_regs_len           = i40e_get_regs_len,
+       .get_regs               = i40e_get_regs,
+       .nway_reset             = i40e_nway_reset,
+       .get_link               = ethtool_op_get_link,
+       .get_wol                = i40e_get_wol,
+       .get_eeprom_len         = i40e_get_eeprom_len,
+       .get_eeprom             = i40e_get_eeprom,
+       .get_ringparam          = i40e_get_ringparam,
+       .set_ringparam          = i40e_set_ringparam,
+       .get_pauseparam         = i40e_get_pauseparam,
+       .get_msglevel           = i40e_get_msglevel,
+       .set_msglevel           = i40e_set_msglevel,
+       .get_rxnfc              = i40e_get_rxnfc,
+       .set_rxnfc              = i40e_set_rxnfc,
+       .self_test              = i40e_diag_test,
+       .get_strings            = i40e_get_strings,
+       .set_phys_id            = i40e_set_phys_id,
+       .get_sset_count         = i40e_get_sset_count,
+       .get_ethtool_stats      = i40e_get_ethtool_stats,
+       .get_coalesce           = i40e_get_coalesce,
+       .set_coalesce           = i40e_set_coalesce,
+       .get_ts_info            = i40e_get_ts_info,
+};
+
+void i40e_set_ethtool_ops(struct net_device *netdev)
+{
+       SET_ETHTOOL_OPS(netdev, &i40e_ethtool_ops);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_hmc.c
new file mode 100644 (file)
index 0000000..901804a
--- /dev/null
@@ -0,0 +1,366 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_status.h"
+#include "i40e_alloc.h"
+#include "i40e_hmc.h"
+#include "i40e_type.h"
+
+/**
+ * i40e_add_sd_table_entry - Adds a segment descriptor to the table
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @sd_index: segment descriptor index to manipulate
+ * @type: what type of segment descriptor we're manipulating
+ * @direct_mode_sz: size to alloc in direct mode
+ **/
+i40e_status i40e_add_sd_table_entry(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 sd_index,
+                                             enum i40e_sd_entry_type type,
+                                             u64 direct_mode_sz)
+{
+       enum i40e_memory_type mem_type __attribute__((unused));
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+       bool dma_mem_alloc_done = false;
+       struct i40e_dma_mem mem;
+       u64 alloc_len;
+
+       if (NULL == hmc_info->sd_table.sd_entry) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_add_sd_table_entry: bad sd_entry\n");
+               goto exit;
+       }
+
+       if (sd_index >= hmc_info->sd_table.sd_cnt) {
+               ret_code = I40E_ERR_INVALID_SD_INDEX;
+               hw_dbg(hw, "i40e_add_sd_table_entry: bad sd_index\n");
+               goto exit;
+       }
+
+       sd_entry = &hmc_info->sd_table.sd_entry[sd_index];
+       if (!sd_entry->valid) {
+               if (I40E_SD_TYPE_PAGED == type) {
+                       mem_type = i40e_mem_pd;
+                       alloc_len = I40E_HMC_PAGED_BP_SIZE;
+               } else {
+                       mem_type = i40e_mem_bp_jumbo;
+                       alloc_len = direct_mode_sz;
+               }
+
+               /* allocate a 4K pd page or 2M backing page */
+               ret_code = i40e_allocate_dma_mem(hw, &mem, mem_type, alloc_len,
+                                                I40E_HMC_PD_BP_BUF_ALIGNMENT);
+               if (ret_code)
+                       goto exit;
+               dma_mem_alloc_done = true;
+               if (I40E_SD_TYPE_PAGED == type) {
+                       ret_code = i40e_allocate_virt_mem(hw,
+                                       &sd_entry->u.pd_table.pd_entry_virt_mem,
+                                       sizeof(struct i40e_hmc_pd_entry) * 512);
+                       if (ret_code)
+                               goto exit;
+                       sd_entry->u.pd_table.pd_entry =
+                               (struct i40e_hmc_pd_entry *)
+                               sd_entry->u.pd_table.pd_entry_virt_mem.va;
+                       memcpy(&sd_entry->u.pd_table.pd_page_addr, &mem,
+                              sizeof(struct i40e_dma_mem));
+               } else {
+                       memcpy(&sd_entry->u.bp.addr, &mem,
+                              sizeof(struct i40e_dma_mem));
+                       sd_entry->u.bp.sd_pd_index = sd_index;
+               }
+               /* initialize the sd entry */
+               hmc_info->sd_table.sd_entry[sd_index].entry_type = type;
+
+               /* increment the ref count */
+               I40E_INC_SD_REFCNT(&hmc_info->sd_table);
+       }
+       /* Increment backing page reference count */
+       if (I40E_SD_TYPE_DIRECT == sd_entry->entry_type)
+               I40E_INC_BP_REFCNT(&sd_entry->u.bp);
+exit:
+       if (ret_code)
+               if (dma_mem_alloc_done)
+                       i40e_free_dma_mem(hw, &mem);
+
+       return ret_code;
+}
+
+/**
+ * i40e_add_pd_table_entry - Adds page descriptor to the specified table
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @pd_index: which page descriptor index to manipulate
+ *
+ * This function:
+ *     1. Initializes the pd entry
+ *     2. Adds pd_entry in the pd_table
+ *     3. Mark the entry valid in i40e_hmc_pd_entry structure
+ *     4. Initializes the pd_entry's ref count to 1
+ * assumptions:
+ *     1. The memory for pd should be pinned down, physically contiguous and
+ *        aligned on 4K boundary and zeroed memory.
+ *     2. It should be 4K in size.
+ **/
+i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 pd_index)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_pd_table *pd_table;
+       struct i40e_hmc_pd_entry *pd_entry;
+       struct i40e_dma_mem mem;
+       u32 sd_idx, rel_pd_idx;
+       u64 *pd_addr;
+       u64 page_desc;
+
+       if (pd_index / I40E_HMC_PD_CNT_IN_SD >= hmc_info->sd_table.sd_cnt) {
+               ret_code = I40E_ERR_INVALID_PAGE_DESC_INDEX;
+               hw_dbg(hw, "i40e_add_pd_table_entry: bad pd_index\n");
+               goto exit;
+       }
+
+       /* find corresponding sd */
+       sd_idx = (pd_index / I40E_HMC_PD_CNT_IN_SD);
+       if (I40E_SD_TYPE_PAGED !=
+           hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+               goto exit;
+
+       rel_pd_idx = (pd_index % I40E_HMC_PD_CNT_IN_SD);
+       pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+       pd_entry = &pd_table->pd_entry[rel_pd_idx];
+       if (!pd_entry->valid) {
+               /* allocate a 4K backing page */
+               ret_code = i40e_allocate_dma_mem(hw, &mem, i40e_mem_bp,
+                                                I40E_HMC_PAGED_BP_SIZE,
+                                                I40E_HMC_PD_BP_BUF_ALIGNMENT);
+               if (ret_code)
+                       goto exit;
+
+               memcpy(&pd_entry->bp.addr, &mem, sizeof(struct i40e_dma_mem));
+               pd_entry->bp.sd_pd_index = pd_index;
+               pd_entry->bp.entry_type = I40E_SD_TYPE_PAGED;
+               /* Set page address and valid bit */
+               page_desc = mem.pa | 0x1;
+
+               pd_addr = (u64 *)pd_table->pd_page_addr.va;
+               pd_addr += rel_pd_idx;
+
+               /* Add the backing page physical address in the pd entry */
+               memcpy(pd_addr, &page_desc, sizeof(u64));
+
+               pd_entry->sd_index = sd_idx;
+               pd_entry->valid = true;
+               I40E_INC_PD_REFCNT(pd_table);
+       }
+       I40E_INC_BP_REFCNT(&pd_entry->bp);
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_remove_pd_bp - remove a backing page from a page descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: distinguishes a VF from a PF
+ *
+ * This function:
+ *     1. Marks the entry in pd tabe (for paged address mode) or in sd table
+ *        (for direct address mode) invalid.
+ *     2. Write to register PMPDINV to invalidate the backing page in FV cache
+ *     3. Decrement the ref count for the pd _entry
+ * assumptions:
+ *     1. Caller can deallocate the memory used by backing storage after this
+ *        function returns.
+ **/
+i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
+                                       struct i40e_hmc_info *hmc_info,
+                                       u32 idx, bool is_pf)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_pd_entry *pd_entry;
+       struct i40e_hmc_pd_table *pd_table;
+       struct i40e_hmc_sd_entry *sd_entry;
+       u32 sd_idx, rel_pd_idx;
+       u64 *pd_addr;
+
+       /* calculate index */
+       sd_idx = idx / I40E_HMC_PD_CNT_IN_SD;
+       rel_pd_idx = idx % I40E_HMC_PD_CNT_IN_SD;
+       if (sd_idx >= hmc_info->sd_table.sd_cnt) {
+               ret_code = I40E_ERR_INVALID_PAGE_DESC_INDEX;
+               hw_dbg(hw, "i40e_remove_pd_bp: bad idx\n");
+               goto exit;
+       }
+       sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+       if (I40E_SD_TYPE_PAGED != sd_entry->entry_type) {
+               ret_code = I40E_ERR_INVALID_SD_TYPE;
+               hw_dbg(hw, "i40e_remove_pd_bp: wrong sd_entry type\n");
+               goto exit;
+       }
+       /* get the entry and decrease its ref counter */
+       pd_table = &hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+       pd_entry = &pd_table->pd_entry[rel_pd_idx];
+       I40E_DEC_BP_REFCNT(&pd_entry->bp);
+       if (pd_entry->bp.ref_cnt)
+               goto exit;
+
+       /* mark the entry invalid */
+       pd_entry->valid = false;
+       I40E_DEC_PD_REFCNT(pd_table);
+       pd_addr = (u64 *)pd_table->pd_page_addr.va;
+       pd_addr += rel_pd_idx;
+       memset(pd_addr, 0, sizeof(u64));
+       if (is_pf)
+               I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, idx);
+       else
+               I40E_INVALIDATE_VF_HMC_PD(hw, sd_idx, idx, hmc_info->hmc_fn_id);
+
+       /* free memory here */
+       ret_code = i40e_free_dma_mem(hw, &(pd_entry->bp.addr));
+       if (ret_code)
+               goto exit;
+       if (!pd_table->ref_cnt)
+               i40e_free_virt_mem(hw, &pd_table->pd_entry_virt_mem);
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_prep_remove_sd_bp - Prepares to remove a backing page from a sd entry
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ **/
+i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+                                            u32 idx)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+
+       /* get the entry and decrease its ref counter */
+       sd_entry = &hmc_info->sd_table.sd_entry[idx];
+       I40E_DEC_BP_REFCNT(&sd_entry->u.bp);
+       if (sd_entry->u.bp.ref_cnt) {
+               ret_code = I40E_ERR_NOT_READY;
+               goto exit;
+       }
+       I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+
+       /* mark the entry invalid */
+       sd_entry->valid = false;
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp_new - Removes a backing page from a segment descriptor
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ * @is_pf: used to distinguish between VF and PF
+ **/
+i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
+                                           struct i40e_hmc_info *hmc_info,
+                                           u32 idx, bool is_pf)
+{
+       struct i40e_hmc_sd_entry *sd_entry;
+       i40e_status ret_code = 0;
+
+       /* get the entry and decrease its ref counter */
+       sd_entry = &hmc_info->sd_table.sd_entry[idx];
+       if (is_pf) {
+               I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_DIRECT);
+       } else {
+               ret_code = I40E_NOT_SUPPORTED;
+               goto exit;
+       }
+       ret_code = i40e_free_dma_mem(hw, &(sd_entry->u.bp.addr));
+       if (ret_code)
+               goto exit;
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_prep_remove_pd_page - Prepares to remove a PD page from sd entry.
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ **/
+i40e_status i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+                                              u32 idx)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+
+       sd_entry = &hmc_info->sd_table.sd_entry[idx];
+
+       if (sd_entry->u.pd_table.ref_cnt) {
+               ret_code = I40E_ERR_NOT_READY;
+               goto exit;
+       }
+
+       /* mark the entry invalid */
+       sd_entry->valid = false;
+
+       I40E_DEC_SD_REFCNT(&hmc_info->sd_table);
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page_new - Removes a PD page from sd entry.
+ * @hw: pointer to our hw struct
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ * @is_pf: used to distinguish between VF and PF
+ **/
+i40e_status i40e_remove_pd_page_new(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 idx, bool is_pf)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+
+       sd_entry = &hmc_info->sd_table.sd_entry[idx];
+       if (is_pf) {
+               I40E_CLEAR_PF_SD_ENTRY(hw, idx, I40E_SD_TYPE_PAGED);
+       } else {
+               ret_code = I40E_NOT_SUPPORTED;
+               goto exit;
+       }
+       /* free memory here */
+       ret_code = i40e_free_dma_mem(hw, &(sd_entry->u.pd_table.pd_page_addr));
+       if (ret_code)
+               goto exit;
+exit:
+       return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_hmc.h
new file mode 100644 (file)
index 0000000..aacd42a
--- /dev/null
@@ -0,0 +1,245 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_HMC_H_
+#define _I40E_HMC_H_
+
+#define I40E_HMC_MAX_BP_COUNT 512
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+#define I40E_HMC_INFO_SIGNATURE                0x484D5347 /* HMSG */
+#define I40E_HMC_PD_CNT_IN_SD          512
+#define I40E_HMC_DIRECT_BP_SIZE                0x200000 /* 2M */
+#define I40E_HMC_PAGED_BP_SIZE         4096
+#define I40E_HMC_PD_BP_BUF_ALIGNMENT   4096
+#define I40E_FIRST_VF_FPM_ID           16
+
+struct i40e_hmc_obj_info {
+       u64 base;       /* base addr in FPM */
+       u32 max_cnt;    /* max count available for this hmc func */
+       u32 cnt;        /* count of objects driver actually wants to create */
+       u64 size;       /* size in bytes of one object */
+};
+
+enum i40e_sd_entry_type {
+       I40E_SD_TYPE_INVALID = 0,
+       I40E_SD_TYPE_PAGED   = 1,
+       I40E_SD_TYPE_DIRECT  = 2
+};
+
+struct i40e_hmc_bp {
+       enum i40e_sd_entry_type entry_type;
+       struct i40e_dma_mem addr; /* populate to be used by hw */
+       u32 sd_pd_index;
+       u32 ref_cnt;
+};
+
+struct i40e_hmc_pd_entry {
+       struct i40e_hmc_bp bp;
+       u32 sd_index;
+       bool valid;
+};
+
+struct i40e_hmc_pd_table {
+       struct i40e_dma_mem pd_page_addr; /* populate to be used by hw */
+       struct i40e_hmc_pd_entry  *pd_entry; /* [512] for sw book keeping */
+       struct i40e_virt_mem pd_entry_virt_mem; /* virt mem for pd_entry */
+
+       u32 ref_cnt;
+       u32 sd_index;
+};
+
+struct i40e_hmc_sd_entry {
+       enum i40e_sd_entry_type entry_type;
+       bool valid;
+
+       union {
+               struct i40e_hmc_pd_table pd_table;
+               struct i40e_hmc_bp bp;
+       } u;
+};
+
+struct i40e_hmc_sd_table {
+       struct i40e_virt_mem addr; /* used to track sd_entry allocations */
+       u32 sd_cnt;
+       u32 ref_cnt;
+       struct i40e_hmc_sd_entry *sd_entry; /* (sd_cnt*512) entries max */
+};
+
+struct i40e_hmc_info {
+       u32 signature;
+       /* equals to pci func num for PF and dynamically allocated for VFs */
+       u8 hmc_fn_id;
+       u16 first_sd_index; /* index of the first available SD */
+
+       /* hmc objects */
+       struct i40e_hmc_obj_info *hmc_obj;
+       struct i40e_virt_mem hmc_obj_virt_mem;
+       struct i40e_hmc_sd_table sd_table;
+};
+
+#define I40E_INC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt++)
+#define I40E_INC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt++)
+#define I40E_INC_BP_REFCNT(bp)         ((bp)->ref_cnt++)
+
+#define I40E_DEC_SD_REFCNT(sd_table)   ((sd_table)->ref_cnt--)
+#define I40E_DEC_PD_REFCNT(pd_table)   ((pd_table)->ref_cnt--)
+#define I40E_DEC_BP_REFCNT(bp)         ((bp)->ref_cnt--)
+
+/**
+ * I40E_SET_PF_SD_ENTRY - marks the sd entry as valid in the hardware
+ * @hw: pointer to our hw struct
+ * @pa: pointer to physical address
+ * @sd_index: segment descriptor index
+ * @hmc_fn_id: hmc function id
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_SET_PF_SD_ENTRY(hw, pa, sd_index, type)                   \
+{                                                                      \
+       u32 val1, val2, val3;                                           \
+       val1 = (u32)(upper_32_bits(pa));                                \
+       val2 = (u32)(pa) | (I40E_HMC_MAX_BP_COUNT <<                    \
+                I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |              \
+               ((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<            \
+               I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT) |                  \
+               (1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT);            \
+       val3 = (sd_index) | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT);       \
+       wr32((hw), I40E_PFHMC_SDDATAHIGH, val1);                        \
+       wr32((hw), I40E_PFHMC_SDDATALOW, val2);                         \
+       wr32((hw), I40E_PFHMC_SDCMD, val3);                             \
+}
+
+/**
+ * I40E_CLEAR_PF_SD_ENTRY - marks the sd entry as invalid in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_index: segment descriptor index
+ * @hmc_fn_id: hmc function id
+ * @type: if sd entry is direct or paged
+ **/
+#define I40E_CLEAR_PF_SD_ENTRY(hw, sd_index, type)                     \
+{                                                                      \
+       u32 val2, val3;                                                 \
+       val2 = (I40E_HMC_MAX_BP_COUNT <<                                \
+               I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT) |               \
+               ((((type) == I40E_SD_TYPE_PAGED) ? 0 : 1) <<            \
+               I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT);                   \
+       val3 = (sd_index) | (1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT);       \
+       wr32((hw), I40E_PFHMC_SDDATAHIGH, 0);                           \
+       wr32((hw), I40E_PFHMC_SDDATALOW, val2);                         \
+       wr32((hw), I40E_PFHMC_SDCMD, val3);                             \
+}
+
+/**
+ * I40E_INVALIDATE_PF_HMC_PD - Invalidates the pd cache in the hardware
+ * @hw: pointer to our hw struct
+ * @sd_idx: segment descriptor index
+ * @pd_idx: page descriptor index
+ * @hmc_fn_id: hmc function id
+ **/
+#define I40E_INVALIDATE_PF_HMC_PD(hw, sd_idx, pd_idx)                  \
+       wr32((hw), I40E_PFHMC_PDINV,                                    \
+           (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |             \
+            ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+#define I40E_INVALIDATE_VF_HMC_PD(hw, sd_idx, pd_idx, hmc_fn_id)          \
+       wr32((hw), I40E_GLHMC_VFPDINV((hmc_fn_id) - I40E_FIRST_VF_FPM_ID), \
+            (((sd_idx) << I40E_PFHMC_PDINV_PMSDIDX_SHIFT) |               \
+             ((pd_idx) << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)))
+
+/**
+ * I40E_FIND_SD_INDEX_LIMIT - finds segment descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @type: type of HMC resources we're searching
+ * @index: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @sd_idx: pointer to return index of the segment descriptor in question
+ * @sd_limit: pointer to return the maximum number of segment descriptors
+ *
+ * This function calculates the segment descriptor index and index limit
+ * for the resource defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_SD_INDEX_LIMIT(hmc_info, type, index, cnt, sd_idx, sd_limit)\
+{                                                                      \
+       u64 fpm_addr, fpm_limit;                                        \
+       fpm_addr = (hmc_info)->hmc_obj[(type)].base +                   \
+                  (hmc_info)->hmc_obj[(type)].size * (index);          \
+       fpm_limit = fpm_addr + (hmc_info)->hmc_obj[(type)].size * (cnt);\
+       *(sd_idx) = (u32)(fpm_addr / I40E_HMC_DIRECT_BP_SIZE);          \
+       *(sd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_DIRECT_BP_SIZE); \
+       /* add one more to the limit to correct our range */            \
+       *(sd_limit) += 1;                                               \
+}
+
+/**
+ * I40E_FIND_PD_INDEX_LIMIT - finds page descriptor index limit
+ * @hmc_info: pointer to the HMC configuration information struct
+ * @type: HMC resource type we're examining
+ * @idx: starting index for the object
+ * @cnt: number of objects we're trying to create
+ * @pd_index: pointer to return page descriptor index
+ * @pd_limit: pointer to return page descriptor index limit
+ *
+ * Calculates the page descriptor index and index limit for the resource
+ * defined by i40e_hmc_rsrc_type.
+ **/
+#define I40E_FIND_PD_INDEX_LIMIT(hmc_info, type, idx, cnt, pd_index, pd_limit)\
+{                                                                      \
+       u64 fpm_adr, fpm_limit;                                         \
+       fpm_adr = (hmc_info)->hmc_obj[(type)].base +                    \
+                 (hmc_info)->hmc_obj[(type)].size * (idx);             \
+       fpm_limit = fpm_adr + (hmc_info)->hmc_obj[(type)].size * (cnt); \
+       *(pd_index) = (u32)(fpm_adr / I40E_HMC_PAGED_BP_SIZE);          \
+       *(pd_limit) = (u32)((fpm_limit - 1) / I40E_HMC_PAGED_BP_SIZE);  \
+       /* add one more to the limit to correct our range */            \
+       *(pd_limit) += 1;                                               \
+}
+i40e_status i40e_add_sd_table_entry(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 sd_index,
+                                             enum i40e_sd_entry_type type,
+                                             u64 direct_mode_sz);
+
+i40e_status i40e_add_pd_table_entry(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 pd_index);
+i40e_status i40e_remove_pd_bp(struct i40e_hw *hw,
+                                       struct i40e_hmc_info *hmc_info,
+                                       u32 idx, bool is_pf);
+i40e_status i40e_prep_remove_sd_bp(struct i40e_hmc_info *hmc_info,
+                                            u32 idx);
+i40e_status i40e_remove_sd_bp_new(struct i40e_hw *hw,
+                                           struct i40e_hmc_info *hmc_info,
+                                           u32 idx, bool is_pf);
+i40e_status i40e_prep_remove_pd_page(struct i40e_hmc_info *hmc_info,
+                                              u32 idx);
+i40e_status i40e_remove_pd_page_new(struct i40e_hw *hw,
+                                             struct i40e_hmc_info *hmc_info,
+                                             u32 idx, bool is_pf);
+
+#endif /* _I40E_HMC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.c
new file mode 100644 (file)
index 0000000..a695b91
--- /dev/null
@@ -0,0 +1,1006 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_type.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+#include "i40e_prototype.h"
+
+/* lan specific interface functions */
+
+/**
+ * i40e_align_l2obj_base - aligns base object pointer to 512 bytes
+ * @offset: base address offset needing alignment
+ *
+ * Aligns the layer 2 function private memory so it's 512-byte aligned.
+ **/
+static u64 i40e_align_l2obj_base(u64 offset)
+{
+       u64 aligned_offset = offset;
+
+       if ((offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT) > 0)
+               aligned_offset += (I40E_HMC_L2OBJ_BASE_ALIGNMENT -
+                                  (offset % I40E_HMC_L2OBJ_BASE_ALIGNMENT));
+
+       return aligned_offset;
+}
+
+/**
+ * i40e_calculate_l2fpm_size - calculates layer 2 FPM memory size
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * Calculates the maximum amount of memory for the function required, based
+ * on the number of resources it must provide context for.
+ **/
+static u64 i40e_calculate_l2fpm_size(u32 txq_num, u32 rxq_num,
+                             u32 fcoe_cntx_num, u32 fcoe_filt_num)
+{
+       u64 fpm_size = 0;
+
+       fpm_size = txq_num * I40E_HMC_OBJ_SIZE_TXQ;
+       fpm_size = i40e_align_l2obj_base(fpm_size);
+
+       fpm_size += (rxq_num * I40E_HMC_OBJ_SIZE_RXQ);
+       fpm_size = i40e_align_l2obj_base(fpm_size);
+
+       fpm_size += (fcoe_cntx_num * I40E_HMC_OBJ_SIZE_FCOE_CNTX);
+       fpm_size = i40e_align_l2obj_base(fpm_size);
+
+       fpm_size += (fcoe_filt_num * I40E_HMC_OBJ_SIZE_FCOE_FILT);
+       fpm_size = i40e_align_l2obj_base(fpm_size);
+
+       return fpm_size;
+}
+
+/**
+ * i40e_init_lan_hmc - initialize i40e_hmc_info struct
+ * @hw: pointer to the HW structure
+ * @txq_num: number of Tx queues needing backing context
+ * @rxq_num: number of Rx queues needing backing context
+ * @fcoe_cntx_num: amount of FCoE statefull contexts needing backing context
+ * @fcoe_filt_num: number of FCoE filters needing backing context
+ *
+ * This function will be called once per physical function initialization.
+ * It will fill out the i40e_hmc_obj_info structure for LAN objects based on
+ * the driver's provided input, as well as information from the HMC itself
+ * loaded from NVRAM.
+ *
+ * Assumptions:
+ *   - HMC Resource Profile has been selected before calling this function.
+ **/
+i40e_status i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+                                       u32 rxq_num, u32 fcoe_cntx_num,
+                                       u32 fcoe_filt_num)
+{
+       struct i40e_hmc_obj_info *obj, *full_obj;
+       i40e_status ret_code = 0;
+       u64 l2fpm_size;
+       u32 size_exp;
+
+       hw->hmc.signature = I40E_HMC_INFO_SIGNATURE;
+       hw->hmc.hmc_fn_id = hw->pf_id;
+
+       /* allocate memory for hmc_obj */
+       ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem,
+                       sizeof(struct i40e_hmc_obj_info) * I40E_HMC_LAN_MAX);
+       if (ret_code)
+               goto init_lan_hmc_out;
+       hw->hmc.hmc_obj = (struct i40e_hmc_obj_info *)
+                         hw->hmc.hmc_obj_virt_mem.va;
+
+       /* The full object will be used to create the LAN HMC SD */
+       full_obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_FULL];
+       full_obj->max_cnt = 0;
+       full_obj->cnt = 0;
+       full_obj->base = 0;
+       full_obj->size = 0;
+
+       /* Tx queue context information */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+       obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+       obj->cnt = txq_num;
+       obj->base = 0;
+       size_exp = rd32(hw, I40E_GLHMC_LANTXOBJSZ);
+       obj->size = (u64)1 << size_exp;
+
+       /* validate values requested by driver don't exceed HMC capacity */
+       if (txq_num > obj->max_cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_init_lan_hmc: Tx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+                         txq_num, obj->max_cnt, ret_code);
+               goto init_lan_hmc_out;
+       }
+
+       /* aggregate values into the full LAN object for later */
+       full_obj->max_cnt += obj->max_cnt;
+       full_obj->cnt += obj->cnt;
+
+       /* Rx queue context information */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+       obj->max_cnt = rd32(hw, I40E_GLHMC_LANQMAX);
+       obj->cnt = rxq_num;
+       obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_TX].base +
+                   (hw->hmc.hmc_obj[I40E_HMC_LAN_TX].cnt *
+                    hw->hmc.hmc_obj[I40E_HMC_LAN_TX].size);
+       obj->base = i40e_align_l2obj_base(obj->base);
+       size_exp = rd32(hw, I40E_GLHMC_LANRXOBJSZ);
+       obj->size = (u64)1 << size_exp;
+
+       /* validate values requested by driver don't exceed HMC capacity */
+       if (rxq_num > obj->max_cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_init_lan_hmc: Rx context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+                         rxq_num, obj->max_cnt, ret_code);
+               goto init_lan_hmc_out;
+       }
+
+       /* aggregate values into the full LAN object for later */
+       full_obj->max_cnt += obj->max_cnt;
+       full_obj->cnt += obj->cnt;
+
+       /* FCoE context information */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+       obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEMAX);
+       obj->cnt = fcoe_cntx_num;
+       obj->base = hw->hmc.hmc_obj[I40E_HMC_LAN_RX].base +
+                   (hw->hmc.hmc_obj[I40E_HMC_LAN_RX].cnt *
+                    hw->hmc.hmc_obj[I40E_HMC_LAN_RX].size);
+       obj->base = i40e_align_l2obj_base(obj->base);
+       size_exp = rd32(hw, I40E_GLHMC_FCOEDDPOBJSZ);
+       obj->size = (u64)1 << size_exp;
+
+       /* validate values requested by driver don't exceed HMC capacity */
+       if (fcoe_cntx_num > obj->max_cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_init_lan_hmc: FCoE context: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+                         fcoe_cntx_num, obj->max_cnt, ret_code);
+               goto init_lan_hmc_out;
+       }
+
+       /* aggregate values into the full LAN object for later */
+       full_obj->max_cnt += obj->max_cnt;
+       full_obj->cnt += obj->cnt;
+
+       /* FCoE filter information */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+       obj->max_cnt = rd32(hw, I40E_GLHMC_FCOEFMAX);
+       obj->cnt = fcoe_filt_num;
+       obj->base = hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].base +
+                   (hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].cnt *
+                    hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX].size);
+       obj->base = i40e_align_l2obj_base(obj->base);
+       size_exp = rd32(hw, I40E_GLHMC_FCOEFOBJSZ);
+       obj->size = (u64)1 << size_exp;
+
+       /* validate values requested by driver don't exceed HMC capacity */
+       if (fcoe_filt_num > obj->max_cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_init_lan_hmc: FCoE filter: asks for 0x%x but max allowed is 0x%x, returns error %d\n",
+                         fcoe_filt_num, obj->max_cnt, ret_code);
+               goto init_lan_hmc_out;
+       }
+
+       /* aggregate values into the full LAN object for later */
+       full_obj->max_cnt += obj->max_cnt;
+       full_obj->cnt += obj->cnt;
+
+       hw->hmc.first_sd_index = 0;
+       hw->hmc.sd_table.ref_cnt = 0;
+       l2fpm_size = i40e_calculate_l2fpm_size(txq_num, rxq_num, fcoe_cntx_num,
+                                              fcoe_filt_num);
+       if (NULL == hw->hmc.sd_table.sd_entry) {
+               hw->hmc.sd_table.sd_cnt = (u32)
+                                  (l2fpm_size + I40E_HMC_DIRECT_BP_SIZE - 1) /
+                                  I40E_HMC_DIRECT_BP_SIZE;
+
+               /* allocate the sd_entry members in the sd_table */
+               ret_code = i40e_allocate_virt_mem(hw, &hw->hmc.sd_table.addr,
+                                         (sizeof(struct i40e_hmc_sd_entry) *
+                                         hw->hmc.sd_table.sd_cnt));
+               if (ret_code)
+                       goto init_lan_hmc_out;
+               hw->hmc.sd_table.sd_entry =
+                       (struct i40e_hmc_sd_entry *)hw->hmc.sd_table.addr.va;
+       }
+       /* store in the LAN full object for later */
+       full_obj->size = l2fpm_size;
+
+init_lan_hmc_out:
+       return ret_code;
+}
+
+/**
+ * i40e_remove_pd_page - Remove a page from the page descriptor table
+ * @hw: pointer to the HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: segment descriptor index to find the relevant page descriptor
+ *
+ * This function:
+ *     1. Marks the entry in pd table (for paged address mode) invalid
+ *     2. write to register PMPDINV to invalidate the backing page in FV cache
+ *     3. Decrement the ref count for  pd_entry
+ * assumptions:
+ *     1. caller can deallocate the memory used by pd after this function
+ *        returns.
+ **/
+static i40e_status i40e_remove_pd_page(struct i40e_hw *hw,
+                                                struct i40e_hmc_info *hmc_info,
+                                                u32 idx)
+{
+       i40e_status ret_code = 0;
+
+       if (!i40e_prep_remove_pd_page(hmc_info, idx))
+               ret_code = i40e_remove_pd_page_new(hw, hmc_info, idx, true);
+
+       return ret_code;
+}
+
+/**
+ * i40e_remove_sd_bp - remove a backing page from a segment descriptor
+ * @hw: pointer to our HW structure
+ * @hmc_info: pointer to the HMC configuration information structure
+ * @idx: the page index
+ *
+ * This function:
+ *     1. Marks the entry in sd table (for direct address mode) invalid
+ *     2. write to register PMSDCMD, PMSDDATALOW(PMSDDATALOW.PMSDVALID set
+ *        to 0) and PMSDDATAHIGH to invalidate the sd page
+ *     3. Decrement the ref count for the sd_entry
+ * assumptions:
+ *     1. caller can deallocate the memory used by backing storage after this
+ *        function returns.
+ **/
+static i40e_status i40e_remove_sd_bp(struct i40e_hw *hw,
+                                              struct i40e_hmc_info *hmc_info,
+                                              u32 idx)
+{
+       i40e_status ret_code = 0;
+
+       if (!i40e_prep_remove_sd_bp(hmc_info, idx))
+               ret_code = i40e_remove_sd_bp_new(hw, hmc_info, idx, true);
+
+       return ret_code;
+}
+
+/**
+ * i40e_create_lan_hmc_object - allocate backing store for hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_create_obj_info struct
+ *
+ * This will allocate memory for PDs and backing pages and populate
+ * the sd and pd entries.
+ **/
+static i40e_status i40e_create_lan_hmc_object(struct i40e_hw *hw,
+                               struct i40e_hmc_lan_create_obj_info *info)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+       u32 pd_idx1 = 0, pd_lmt1 = 0;
+       u32 pd_idx = 0, pd_lmt = 0;
+       bool pd_error = false;
+       u32 sd_idx, sd_lmt;
+       u64 sd_size;
+       u32 i, j;
+
+       if (NULL == info) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_create_lan_hmc_object: bad info ptr\n");
+               goto exit;
+       }
+       if (NULL == info->hmc_info) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_create_lan_hmc_object: bad hmc_info ptr\n");
+               goto exit;
+       }
+       if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_create_lan_hmc_object: bad signature\n");
+               goto exit;
+       }
+
+       if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+               hw_dbg(hw, "i40e_create_lan_hmc_object: returns error %d\n",
+                         ret_code);
+               goto exit;
+       }
+       if ((info->start_idx + info->count) >
+           info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_create_lan_hmc_object: returns error %d\n",
+                         ret_code);
+               goto exit;
+       }
+
+       /* find sd index and limit */
+       I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+                                info->start_idx, info->count,
+                                &sd_idx, &sd_lmt);
+       if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+           sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+                       ret_code = I40E_ERR_INVALID_SD_INDEX;
+                       goto exit;
+       }
+       /* find pd index */
+       I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+                                info->start_idx, info->count, &pd_idx,
+                                &pd_lmt);
+
+       /* This is to cover for cases where you may not want to have an SD with
+        * the full 2M memory but something smaller. By not filling out any
+        * size, the function will default the SD size to be 2M.
+        */
+       if (info->direct_mode_sz == 0)
+               sd_size = I40E_HMC_DIRECT_BP_SIZE;
+       else
+               sd_size = info->direct_mode_sz;
+
+       /* check if all the sds are valid. If not, allocate a page and
+        * initialize it.
+        */
+       for (j = sd_idx; j < sd_lmt; j++) {
+               /* update the sd table entry */
+               ret_code = i40e_add_sd_table_entry(hw, info->hmc_info, j,
+                                                  info->entry_type,
+                                                  sd_size);
+               if (ret_code)
+                       goto exit_sd_error;
+               sd_entry = &info->hmc_info->sd_table.sd_entry[j];
+               if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+                       /* check if all the pds in this sd are valid. If not,
+                        * allocate a page and initialize it.
+                        */
+
+                       /* find pd_idx and pd_lmt in this sd */
+                       pd_idx1 = max(pd_idx, (j * I40E_HMC_MAX_BP_COUNT));
+                       pd_lmt1 = min(pd_lmt,
+                                     ((j + 1) * I40E_HMC_MAX_BP_COUNT));
+                       for (i = pd_idx1; i < pd_lmt1; i++) {
+                               /* update the pd table entry */
+                               ret_code = i40e_add_pd_table_entry(hw,
+                                                               info->hmc_info,
+                                                               i);
+                               if (ret_code) {
+                                       pd_error = true;
+                                       break;
+                               }
+                       }
+                       if (pd_error) {
+                               /* remove the backing pages from pd_idx1 to i */
+                               while (i && (i > pd_idx1)) {
+                                       i40e_remove_pd_bp(hw, info->hmc_info,
+                                                         (i - 1), true);
+                                       i--;
+                               }
+                       }
+               }
+               if (!sd_entry->valid) {
+                       sd_entry->valid = true;
+                       switch (sd_entry->entry_type) {
+                       case I40E_SD_TYPE_PAGED:
+                               I40E_SET_PF_SD_ENTRY(hw,
+                                       sd_entry->u.pd_table.pd_page_addr.pa,
+                                       j, sd_entry->entry_type);
+                               break;
+                       case I40E_SD_TYPE_DIRECT:
+                               I40E_SET_PF_SD_ENTRY(hw, sd_entry->u.bp.addr.pa,
+                                                    j, sd_entry->entry_type);
+                               break;
+                       default:
+                               ret_code = I40E_ERR_INVALID_SD_TYPE;
+                               goto exit;
+                               break;
+                       }
+               }
+       }
+       goto exit;
+
+exit_sd_error:
+       /* cleanup for sd entries from j to sd_idx */
+       while (j && (j > sd_idx)) {
+               sd_entry = &info->hmc_info->sd_table.sd_entry[j - 1];
+               switch (sd_entry->entry_type) {
+               case I40E_SD_TYPE_PAGED:
+                       pd_idx1 = max(pd_idx,
+                                     ((j - 1) * I40E_HMC_MAX_BP_COUNT));
+                       pd_lmt1 = min(pd_lmt, (j * I40E_HMC_MAX_BP_COUNT));
+                       for (i = pd_idx1; i < pd_lmt1; i++) {
+                               i40e_remove_pd_bp(
+                                       hw,
+                                       info->hmc_info,
+                                       i,
+                                       true);
+                       }
+                       i40e_remove_pd_page(hw, info->hmc_info, (j - 1));
+                       break;
+               case I40E_SD_TYPE_DIRECT:
+                       i40e_remove_sd_bp(hw, info->hmc_info, (j - 1));
+                       break;
+               default:
+                       ret_code = I40E_ERR_INVALID_SD_TYPE;
+                       break;
+               }
+               j--;
+       }
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_configure_lan_hmc - prepare the HMC backing store
+ * @hw: pointer to the hw structure
+ * @model: the model for the layout of the SD/PD tables
+ *
+ * - This function will be called once per physical function initialization.
+ * - This function will be called after i40e_init_lan_hmc() and before
+ *   any LAN/FCoE HMC objects can be created.
+ **/
+i40e_status i40e_configure_lan_hmc(struct i40e_hw *hw,
+                                            enum i40e_hmc_model model)
+{
+       struct i40e_hmc_lan_create_obj_info info;
+       i40e_status ret_code = 0;
+       u8 hmc_fn_id = hw->hmc.hmc_fn_id;
+       struct i40e_hmc_obj_info *obj;
+
+       /* Initialize part of the create object info struct */
+       info.hmc_info = &hw->hmc;
+       info.rsrc_type = I40E_HMC_LAN_FULL;
+       info.start_idx = 0;
+       info.direct_mode_sz = hw->hmc.hmc_obj[I40E_HMC_LAN_FULL].size;
+
+       /* Build the SD entry for the LAN objects */
+       switch (model) {
+       case I40E_HMC_MODEL_DIRECT_PREFERRED:
+       case I40E_HMC_MODEL_DIRECT_ONLY:
+               info.entry_type = I40E_SD_TYPE_DIRECT;
+               /* Make one big object, a single SD */
+               info.count = 1;
+               ret_code = i40e_create_lan_hmc_object(hw, &info);
+               if ((ret_code) &&
+                   (model == I40E_HMC_MODEL_DIRECT_PREFERRED))
+                       goto try_type_paged;
+               else if (ret_code)
+                       goto configure_lan_hmc_out;
+               /* else clause falls through the break */
+               break;
+       case I40E_HMC_MODEL_PAGED_ONLY:
+try_type_paged:
+               info.entry_type = I40E_SD_TYPE_PAGED;
+               /* Make one big object in the PD table */
+               info.count = 1;
+               ret_code = i40e_create_lan_hmc_object(hw, &info);
+               if (ret_code)
+                       goto configure_lan_hmc_out;
+               break;
+       default:
+               /* unsupported type */
+               ret_code = I40E_ERR_INVALID_SD_TYPE;
+               hw_dbg(hw, "i40e_configure_lan_hmc: Unknown SD type: %d\n",
+                         ret_code);
+               goto configure_lan_hmc_out;
+               break;
+       }
+
+       /* Configure and program the FPM registers so objects can be created */
+
+       /* Tx contexts */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_TX];
+       wr32(hw, I40E_GLHMC_LANTXBASE(hmc_fn_id),
+            (u32)((obj->base & I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK) / 512));
+       wr32(hw, I40E_GLHMC_LANTXCNT(hmc_fn_id), obj->cnt);
+
+       /* Rx contexts */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_LAN_RX];
+       wr32(hw, I40E_GLHMC_LANRXBASE(hmc_fn_id),
+            (u32)((obj->base & I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK) / 512));
+       wr32(hw, I40E_GLHMC_LANRXCNT(hmc_fn_id), obj->cnt);
+
+       /* FCoE contexts */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_CTX];
+       wr32(hw, I40E_GLHMC_FCOEDDPBASE(hmc_fn_id),
+        (u32)((obj->base & I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK) / 512));
+       wr32(hw, I40E_GLHMC_FCOEDDPCNT(hmc_fn_id), obj->cnt);
+
+       /* FCoE filters */
+       obj = &hw->hmc.hmc_obj[I40E_HMC_FCOE_FILT];
+       wr32(hw, I40E_GLHMC_FCOEFBASE(hmc_fn_id),
+            (u32)((obj->base & I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK) / 512));
+       wr32(hw, I40E_GLHMC_FCOEFCNT(hmc_fn_id), obj->cnt);
+
+configure_lan_hmc_out:
+       return ret_code;
+}
+
+/**
+ * i40e_delete_hmc_object - remove hmc objects
+ * @hw: pointer to the HW structure
+ * @info: pointer to i40e_hmc_delete_obj_info struct
+ *
+ * This will de-populate the SDs and PDs.  It frees
+ * the memory for PDS and backing storage.  After this function is returned,
+ * caller should deallocate memory allocated previously for
+ * book-keeping information about PDs and backing storage.
+ **/
+static i40e_status i40e_delete_lan_hmc_object(struct i40e_hw *hw,
+                               struct i40e_hmc_lan_delete_obj_info *info)
+{
+       i40e_status ret_code = 0;
+       struct i40e_hmc_pd_table *pd_table;
+       u32 pd_idx, pd_lmt, rel_pd_idx;
+       u32 sd_idx, sd_lmt;
+       u32 i, j;
+
+       if (NULL == info) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_delete_hmc_object: bad info ptr\n");
+               goto exit;
+       }
+       if (NULL == info->hmc_info) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_delete_hmc_object: bad info->hmc_info ptr\n");
+               goto exit;
+       }
+       if (I40E_HMC_INFO_SIGNATURE != info->hmc_info->signature) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_delete_hmc_object: bad hmc_info->signature\n");
+               goto exit;
+       }
+
+       if (NULL == info->hmc_info->sd_table.sd_entry) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_delete_hmc_object: bad sd_entry\n");
+               goto exit;
+       }
+
+       if (NULL == info->hmc_info->hmc_obj) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_delete_hmc_object: bad hmc_info->hmc_obj\n");
+               goto exit;
+       }
+       if (info->start_idx >= info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+               hw_dbg(hw, "i40e_delete_hmc_object: returns error %d\n",
+                         ret_code);
+               goto exit;
+       }
+
+       if ((info->start_idx + info->count) >
+           info->hmc_info->hmc_obj[info->rsrc_type].cnt) {
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_COUNT;
+               hw_dbg(hw, "i40e_delete_hmc_object: returns error %d\n",
+                         ret_code);
+               goto exit;
+       }
+
+       I40E_FIND_PD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+                                info->start_idx, info->count, &pd_idx,
+                                &pd_lmt);
+
+       for (j = pd_idx; j < pd_lmt; j++) {
+               sd_idx = j / I40E_HMC_PD_CNT_IN_SD;
+
+               if (I40E_SD_TYPE_PAGED !=
+                   info->hmc_info->sd_table.sd_entry[sd_idx].entry_type)
+                       continue;
+
+               rel_pd_idx = j % I40E_HMC_PD_CNT_IN_SD;
+
+               pd_table =
+                       &info->hmc_info->sd_table.sd_entry[sd_idx].u.pd_table;
+               if (pd_table->pd_entry[rel_pd_idx].valid) {
+                       ret_code = i40e_remove_pd_bp(hw, info->hmc_info,
+                                                    j, true);
+                       if (ret_code)
+                               goto exit;
+               }
+       }
+
+       /* find sd index and limit */
+       I40E_FIND_SD_INDEX_LIMIT(info->hmc_info, info->rsrc_type,
+                                info->start_idx, info->count,
+                                &sd_idx, &sd_lmt);
+       if (sd_idx >= info->hmc_info->sd_table.sd_cnt ||
+           sd_lmt > info->hmc_info->sd_table.sd_cnt) {
+               ret_code = I40E_ERR_INVALID_SD_INDEX;
+               goto exit;
+       }
+
+       for (i = sd_idx; i < sd_lmt; i++) {
+               if (!info->hmc_info->sd_table.sd_entry[i].valid)
+                       continue;
+               switch (info->hmc_info->sd_table.sd_entry[i].entry_type) {
+               case I40E_SD_TYPE_DIRECT:
+                       ret_code = i40e_remove_sd_bp(hw, info->hmc_info, i);
+                       if (ret_code)
+                               goto exit;
+                       break;
+               case I40E_SD_TYPE_PAGED:
+                       ret_code = i40e_remove_pd_page(hw, info->hmc_info, i);
+                       if (ret_code)
+                               goto exit;
+                       break;
+               default:
+                       break;
+               }
+       }
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_shutdown_lan_hmc - Remove HMC backing store, free allocated memory
+ * @hw: pointer to the hw structure
+ *
+ * This must be called by drivers as they are shutting down and being
+ * removed from the OS.
+ **/
+i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw)
+{
+       struct i40e_hmc_lan_delete_obj_info info;
+       i40e_status ret_code;
+
+       info.hmc_info = &hw->hmc;
+       info.rsrc_type = I40E_HMC_LAN_FULL;
+       info.start_idx = 0;
+       info.count = 1;
+
+       /* delete the object */
+       ret_code = i40e_delete_lan_hmc_object(hw, &info);
+
+       /* free the SD table entry for LAN */
+       i40e_free_virt_mem(hw, &hw->hmc.sd_table.addr);
+       hw->hmc.sd_table.sd_cnt = 0;
+       hw->hmc.sd_table.sd_entry = NULL;
+
+       /* free memory used for hmc_obj */
+       i40e_free_virt_mem(hw, &hw->hmc.hmc_obj_virt_mem);
+       hw->hmc.hmc_obj = NULL;
+
+       return ret_code;
+}
+
+#define I40E_HMC_STORE(_struct, _ele)          \
+       offsetof(struct _struct, _ele),         \
+       FIELD_SIZEOF(struct _struct, _ele)
+
+struct i40e_context_ele {
+       u16 offset;
+       u16 size_of;
+       u16 width;
+       u16 lsb;
+};
+
+/* LAN Tx Queue Context */
+static struct i40e_context_ele i40e_hmc_txq_ce_info[] = {
+                                            /* Field      Width    LSB */
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, head),           13,      0 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, new_context),     1,     30 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, base),           57,     32 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, fc_ena),          1,     89 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, timesync_ena),    1,     90 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, fd_ena),          1,     91 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, alt_vlan_ena),    1,     92 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, cpuid),           8,     96 },
+/* line 1 */
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, thead_wb),       13,  0 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_ena),     1, 32 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, qlen),           13, 33 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, tphrdesc_ena),    1, 46 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, tphrpacket_ena),  1, 47 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, tphwdesc_ena),    1, 48 + 128 },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, head_wb_addr),   64, 64 + 128 },
+/* line 7 */
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, crc),            32,  0 + (7 * 128) },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist),        10, 84 + (7 * 128) },
+       {I40E_HMC_STORE(i40e_hmc_obj_txq, rdylist_act),     1, 94 + (7 * 128) },
+       { 0 }
+};
+
+/* LAN Rx Queue Context */
+static struct i40e_context_ele i40e_hmc_rxq_ce_info[] = {
+                                        /* Field      Width    LSB */
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, head),        13,    0   },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, cpuid),        8,    13  },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, base),        57,    32  },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, qlen),        13,    89  },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, dbuff),        7,    102 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, hbuff),        5,    109 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, dtype),        2,    114 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, dsize),        1,    116 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, crcstrip),     1,    117 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, fc_ena),       1,    118 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, l2tsel),       1,    119 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_0),     4,    120 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, hsplit_1),     2,    124 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, showiv),       1,    127 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, rxmax),       14,    174 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphrdesc_ena), 1,    193 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphwdesc_ena), 1,    194 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphdata_ena),  1,    195 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, tphhead_ena),  1,    196 },
+       { I40E_HMC_STORE(i40e_hmc_obj_rxq, lrxqthresh),   3,    198 },
+       { 0 }
+};
+
+/**
+ * i40e_clear_hmc_context - zero out the HMC context bits
+ * @hw:       the hardware struct
+ * @context_bytes: pointer to the context bit array (DMA memory)
+ * @hmc_type: the type of HMC resource
+ **/
+static i40e_status i40e_clear_hmc_context(struct i40e_hw *hw,
+                                       u8 *context_bytes,
+                                       enum i40e_hmc_lan_rsrc_type hmc_type)
+{
+       /* clean the bit array */
+       memset(context_bytes, 0, (u32)hw->hmc.hmc_obj[hmc_type].size);
+
+       return 0;
+}
+
+/**
+ * i40e_set_hmc_context - replace HMC context bits
+ * @context_bytes: pointer to the context bit array
+ * @ce_info:  a description of the struct to be filled
+ * @dest:     the struct to be filled
+ **/
+static i40e_status i40e_set_hmc_context(u8 *context_bytes,
+                                       struct i40e_context_ele *ce_info,
+                                       u8 *dest)
+{
+       u16 shift_width;
+       u64 bitfield;
+       u8 hi_byte;
+       u8 hi_mask;
+       u64 t_bits;
+       u64 mask;
+       u8 *p;
+       int f;
+
+       for (f = 0; ce_info[f].width != 0; f++) {
+               /* clear out the field */
+               bitfield = 0;
+
+               /* copy from the next struct field */
+               p = dest + ce_info[f].offset;
+               switch (ce_info[f].size_of) {
+               case 1:
+                       bitfield = *p;
+                       break;
+               case 2:
+                       bitfield = cpu_to_le16(*(u16 *)p);
+                       break;
+               case 4:
+                       bitfield = cpu_to_le32(*(u32 *)p);
+                       break;
+               case 8:
+                       bitfield = cpu_to_le64(*(u64 *)p);
+                       break;
+               }
+
+               /* prepare the bits and mask */
+               shift_width = ce_info[f].lsb % 8;
+               mask = ((u64)1 << ce_info[f].width) - 1;
+
+               /* save upper bytes for special case */
+               hi_mask = (u8)((mask >> 56) & 0xff);
+               hi_byte = (u8)((bitfield >> 56) & 0xff);
+
+               /* shift to correct alignment */
+               mask <<= shift_width;
+               bitfield <<= shift_width;
+
+               /* get the current bits from the target bit string */
+               p = context_bytes + (ce_info[f].lsb / 8);
+               memcpy(&t_bits, p, sizeof(u64));
+
+               t_bits &= ~mask;          /* get the bits not changing */
+               t_bits |= bitfield;       /* add in the new bits */
+
+               /* put it all back */
+               memcpy(p, &t_bits, sizeof(u64));
+
+               /* deal with the special case if needed
+                * example: 62 bit field that starts in bit 5 of first byte
+                *          will overlap 3 bits into byte 9
+                */
+               if ((shift_width + ce_info[f].width) > 64) {
+                       u8 byte;
+
+                       hi_mask >>= (8 - shift_width);
+                       hi_byte >>= (8 - shift_width);
+                       byte = p[8] & ~hi_mask;  /* get the bits not changing */
+                       byte |= hi_byte;         /* add in the new bits */
+                       p[8] = byte;             /* put it back */
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_hmc_get_object_va - retrieves an object's virtual address
+ * @hmc_info: pointer to i40e_hmc_info struct
+ * @object_base: pointer to u64 to get the va
+ * @rsrc_type: the hmc resource type
+ * @obj_idx: hmc object index
+ *
+ * This function retrieves the object's virtual address from the object
+ * base pointer.  This function is used for LAN Queue contexts.
+ **/
+static
+i40e_status i40e_hmc_get_object_va(struct i40e_hmc_info *hmc_info,
+                                       u8 **object_base,
+                                       enum i40e_hmc_lan_rsrc_type rsrc_type,
+                                       u32 obj_idx)
+{
+       u32 obj_offset_in_sd, obj_offset_in_pd;
+       i40e_status ret_code = 0;
+       struct i40e_hmc_sd_entry *sd_entry;
+       struct i40e_hmc_pd_entry *pd_entry;
+       u32 pd_idx, pd_lmt, rel_pd_idx;
+       u64 obj_offset_in_fpm;
+       u32 sd_idx, sd_lmt;
+
+       if (NULL == hmc_info) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info ptr\n");
+               goto exit;
+       }
+       if (NULL == hmc_info->hmc_obj) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info->hmc_obj ptr\n");
+               goto exit;
+       }
+       if (NULL == object_base) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_hmc_get_object_va: bad object_base ptr\n");
+               goto exit;
+       }
+       if (I40E_HMC_INFO_SIGNATURE != hmc_info->signature) {
+               ret_code = I40E_ERR_BAD_PTR;
+               hw_dbg(hw, "i40e_hmc_get_object_va: bad hmc_info->signature\n");
+               goto exit;
+       }
+       if (obj_idx >= hmc_info->hmc_obj[rsrc_type].cnt) {
+               hw_dbg(hw, "i40e_hmc_get_object_va: returns error %d\n",
+                         ret_code);
+               ret_code = I40E_ERR_INVALID_HMC_OBJ_INDEX;
+               goto exit;
+       }
+       /* find sd index and limit */
+       I40E_FIND_SD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+                                &sd_idx, &sd_lmt);
+
+       sd_entry = &hmc_info->sd_table.sd_entry[sd_idx];
+       obj_offset_in_fpm = hmc_info->hmc_obj[rsrc_type].base +
+                           hmc_info->hmc_obj[rsrc_type].size * obj_idx;
+
+       if (I40E_SD_TYPE_PAGED == sd_entry->entry_type) {
+               I40E_FIND_PD_INDEX_LIMIT(hmc_info, rsrc_type, obj_idx, 1,
+                                        &pd_idx, &pd_lmt);
+               rel_pd_idx = pd_idx % I40E_HMC_PD_CNT_IN_SD;
+               pd_entry = &sd_entry->u.pd_table.pd_entry[rel_pd_idx];
+               obj_offset_in_pd = (u32)(obj_offset_in_fpm %
+                                        I40E_HMC_PAGED_BP_SIZE);
+               *object_base = (u8 *)pd_entry->bp.addr.va + obj_offset_in_pd;
+       } else {
+               obj_offset_in_sd = (u32)(obj_offset_in_fpm %
+                                        I40E_HMC_DIRECT_BP_SIZE);
+               *object_base = (u8 *)sd_entry->u.bp.addr.va + obj_offset_in_sd;
+       }
+exit:
+       return ret_code;
+}
+
+/**
+ * i40e_clear_lan_tx_queue_context - clear the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ **/
+i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+                                                     u16 queue)
+{
+       i40e_status err;
+       u8 *context_bytes;
+
+       err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+                                    I40E_HMC_LAN_TX, queue);
+       if (err < 0)
+               return err;
+
+       return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_TX);
+}
+
+/**
+ * i40e_set_lan_tx_queue_context - set the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ * @s:     the struct to be filled
+ **/
+i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+                                                   u16 queue,
+                                                   struct i40e_hmc_obj_txq *s)
+{
+       i40e_status err;
+       u8 *context_bytes;
+
+       err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+                                    I40E_HMC_LAN_TX, queue);
+       if (err < 0)
+               return err;
+
+       return i40e_set_hmc_context(context_bytes,
+                                   i40e_hmc_txq_ce_info, (u8 *)s);
+}
+
+/**
+ * i40e_clear_lan_rx_queue_context - clear the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ **/
+i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+                                                     u16 queue)
+{
+       i40e_status err;
+       u8 *context_bytes;
+
+       err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+                                    I40E_HMC_LAN_RX, queue);
+       if (err < 0)
+               return err;
+
+       return i40e_clear_hmc_context(hw, context_bytes, I40E_HMC_LAN_RX);
+}
+
+/**
+ * i40e_set_lan_rx_queue_context - set the HMC context for the queue
+ * @hw:    the hardware struct
+ * @queue: the queue we care about
+ * @s:     the struct to be filled
+ **/
+i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+                                                   u16 queue,
+                                                   struct i40e_hmc_obj_rxq *s)
+{
+       i40e_status err;
+       u8 *context_bytes;
+
+       err = i40e_hmc_get_object_va(&hw->hmc, &context_bytes,
+                                    I40E_HMC_LAN_RX, queue);
+       if (err < 0)
+               return err;
+
+       return i40e_set_hmc_context(context_bytes,
+                                   i40e_hmc_rxq_ce_info, (u8 *)s);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h b/drivers/net/ethernet/intel/i40e/i40e_lan_hmc.h
new file mode 100644 (file)
index 0000000..00ff350
--- /dev/null
@@ -0,0 +1,169 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_LAN_HMC_H_
+#define _I40E_LAN_HMC_H_
+
+/* forward-declare the HW struct for the compiler */
+struct i40e_hw;
+
+/* HMC element context information */
+
+/* Rx queue context data */
+struct i40e_hmc_obj_rxq {
+       u16 head;
+       u8  cpuid;
+       u64 base;
+       u16 qlen;
+#define I40E_RXQ_CTX_DBUFF_SHIFT 7
+       u8  dbuff;
+#define I40E_RXQ_CTX_HBUFF_SHIFT 6
+       u8  hbuff;
+       u8  dtype;
+       u8  dsize;
+       u8  crcstrip;
+       u8  fc_ena;
+       u8  l2tsel;
+       u8  hsplit_0;
+       u8  hsplit_1;
+       u8  showiv;
+       u16 rxmax;
+       u8  tphrdesc_ena;
+       u8  tphwdesc_ena;
+       u8  tphdata_ena;
+       u8  tphhead_ena;
+       u8  lrxqthresh;
+};
+
+/* Tx queue context data */
+struct i40e_hmc_obj_txq {
+       u16 head;
+       u8  new_context;
+       u64 base;
+       u8  fc_ena;
+       u8  timesync_ena;
+       u8  fd_ena;
+       u8  alt_vlan_ena;
+       u16 thead_wb;
+       u16 cpuid;
+       u8  head_wb_ena;
+       u16 qlen;
+       u8  tphrdesc_ena;
+       u8  tphrpacket_ena;
+       u8  tphwdesc_ena;
+       u64 head_wb_addr;
+       u32 crc;
+       u16 rdylist;
+       u8  rdylist_act;
+};
+
+/* for hsplit_0 field of Rx HMC context */
+enum i40e_hmc_obj_rx_hsplit_0 {
+       I40E_HMC_OBJ_RX_HSPLIT_0_NO_SPLIT      = 0,
+       I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_L2      = 1,
+       I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_IP      = 2,
+       I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_TCP_UDP = 4,
+       I40E_HMC_OBJ_RX_HSPLIT_0_SPLIT_SCTP    = 8,
+};
+
+/* fcoe_cntx and fcoe_filt are for debugging purpose only */
+struct i40e_hmc_obj_fcoe_cntx {
+       u32 rsv[32];
+};
+
+struct i40e_hmc_obj_fcoe_filt {
+       u32 rsv[8];
+};
+
+/* Context sizes for LAN objects */
+enum i40e_hmc_lan_object_size {
+       I40E_HMC_LAN_OBJ_SZ_8   = 0x3,
+       I40E_HMC_LAN_OBJ_SZ_16  = 0x4,
+       I40E_HMC_LAN_OBJ_SZ_32  = 0x5,
+       I40E_HMC_LAN_OBJ_SZ_64  = 0x6,
+       I40E_HMC_LAN_OBJ_SZ_128 = 0x7,
+       I40E_HMC_LAN_OBJ_SZ_256 = 0x8,
+       I40E_HMC_LAN_OBJ_SZ_512 = 0x9,
+};
+
+#define I40E_HMC_L2OBJ_BASE_ALIGNMENT 512
+#define I40E_HMC_OBJ_SIZE_TXQ         128
+#define I40E_HMC_OBJ_SIZE_RXQ         32
+#define I40E_HMC_OBJ_SIZE_FCOE_CNTX   128
+#define I40E_HMC_OBJ_SIZE_FCOE_FILT   32
+
+enum i40e_hmc_lan_rsrc_type {
+       I40E_HMC_LAN_FULL  = 0,
+       I40E_HMC_LAN_TX    = 1,
+       I40E_HMC_LAN_RX    = 2,
+       I40E_HMC_FCOE_CTX  = 3,
+       I40E_HMC_FCOE_FILT = 4,
+       I40E_HMC_LAN_MAX   = 5
+};
+
+enum i40e_hmc_model {
+       I40E_HMC_MODEL_DIRECT_PREFERRED = 0,
+       I40E_HMC_MODEL_DIRECT_ONLY      = 1,
+       I40E_HMC_MODEL_PAGED_ONLY       = 2,
+       I40E_HMC_MODEL_UNKNOWN,
+};
+
+struct i40e_hmc_lan_create_obj_info {
+       struct i40e_hmc_info *hmc_info;
+       u32 rsrc_type;
+       u32 start_idx;
+       u32 count;
+       enum i40e_sd_entry_type entry_type;
+       u64 direct_mode_sz;
+};
+
+struct i40e_hmc_lan_delete_obj_info {
+       struct i40e_hmc_info *hmc_info;
+       u32 rsrc_type;
+       u32 start_idx;
+       u32 count;
+};
+
+i40e_status i40e_init_lan_hmc(struct i40e_hw *hw, u32 txq_num,
+                                       u32 rxq_num, u32 fcoe_cntx_num,
+                                       u32 fcoe_filt_num);
+i40e_status i40e_configure_lan_hmc(struct i40e_hw *hw,
+                                            enum i40e_hmc_model model);
+i40e_status i40e_shutdown_lan_hmc(struct i40e_hw *hw);
+
+i40e_status i40e_clear_lan_tx_queue_context(struct i40e_hw *hw,
+                                                     u16 queue);
+i40e_status i40e_set_lan_tx_queue_context(struct i40e_hw *hw,
+                                                   u16 queue,
+                                                   struct i40e_hmc_obj_txq *s);
+i40e_status i40e_clear_lan_rx_queue_context(struct i40e_hw *hw,
+                                                     u16 queue);
+i40e_status i40e_set_lan_rx_queue_context(struct i40e_hw *hw,
+                                                   u16 queue,
+                                                   struct i40e_hmc_obj_rxq *s);
+
+#endif /* _I40E_LAN_HMC_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
new file mode 100644 (file)
index 0000000..601d482
--- /dev/null
@@ -0,0 +1,7375 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+/* Local includes */
+#include "i40e.h"
+
+const char i40e_driver_name[] = "i40e";
+static const char i40e_driver_string[] =
+                       "Intel(R) Ethernet Connection XL710 Network Driver";
+
+#define DRV_KERN "-k"
+
+#define DRV_VERSION_MAJOR 0
+#define DRV_VERSION_MINOR 3
+#define DRV_VERSION_BUILD 9
+#define DRV_VERSION __stringify(DRV_VERSION_MAJOR) "." \
+            __stringify(DRV_VERSION_MINOR) "." \
+            __stringify(DRV_VERSION_BUILD)    DRV_KERN
+const char i40e_driver_version_str[] = DRV_VERSION;
+static const char i40e_copyright[] = "Copyright (c) 2013 Intel Corporation.";
+
+/* a bit of forward declarations */
+static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi);
+static void i40e_handle_reset_warning(struct i40e_pf *pf);
+static int i40e_add_vsi(struct i40e_vsi *vsi);
+static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi);
+static int i40e_setup_pf_switch(struct i40e_pf *pf);
+static int i40e_setup_misc_vector(struct i40e_pf *pf);
+static void i40e_determine_queue_usage(struct i40e_pf *pf);
+static int i40e_setup_pf_filter_control(struct i40e_pf *pf);
+
+/* i40e_pci_tbl - PCI Device ID Table
+ *
+ * Last entry must be all 0s
+ *
+ * { Vendor ID, Device ID, SubVendor ID, SubDevice ID,
+ *   Class, Class Mask, private data (not used) }
+ */
+static DEFINE_PCI_DEVICE_TABLE(i40e_pci_tbl) = {
+       {PCI_VDEVICE(INTEL, I40E_SFP_XL710_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_SFP_X710_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_QEMU_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_KX_A_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_KX_B_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_KX_C_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_KX_D_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_QSFP_A_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_QSFP_B_DEVICE_ID), 0},
+       {PCI_VDEVICE(INTEL, I40E_QSFP_C_DEVICE_ID), 0},
+       /* required last entry */
+       {0, }
+};
+MODULE_DEVICE_TABLE(pci, i40e_pci_tbl);
+
+#define I40E_MAX_VF_COUNT 128
+static int debug = -1;
+module_param(debug, int, 0);
+MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
+
+MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
+MODULE_DESCRIPTION("Intel(R) Ethernet Connection XL710 Network Driver");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(DRV_VERSION);
+
+/**
+ * i40e_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ * @alignment: what to align the allocation to
+ **/
+int i40e_allocate_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem,
+                           u64 size, u32 alignment)
+{
+       struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+
+       mem->size = ALIGN(size, alignment);
+       mem->va = dma_zalloc_coherent(&pf->pdev->dev, mem->size,
+                                     &mem->pa, GFP_KERNEL);
+       if (mem->va)
+               return 0;
+
+       return -ENOMEM;
+}
+
+/**
+ * i40e_free_dma_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+int i40e_free_dma_mem_d(struct i40e_hw *hw, struct i40e_dma_mem *mem)
+{
+       struct i40e_pf *pf = (struct i40e_pf *)hw->back;
+
+       dma_free_coherent(&pf->pdev->dev, mem->size, mem->va, mem->pa);
+       mem->va = NULL;
+       mem->pa = 0;
+       mem->size = 0;
+
+       return 0;
+}
+
+/**
+ * i40e_allocate_virt_mem_d - OS specific memory alloc for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to fill out
+ * @size: size of memory requested
+ **/
+int i40e_allocate_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem,
+                            u32 size)
+{
+       mem->size = size;
+       mem->va = kzalloc(size, GFP_KERNEL);
+
+       if (mem->va)
+               return 0;
+
+       return -ENOMEM;
+}
+
+/**
+ * i40e_free_virt_mem_d - OS specific memory free for shared code
+ * @hw:   pointer to the HW structure
+ * @mem:  ptr to mem struct to free
+ **/
+int i40e_free_virt_mem_d(struct i40e_hw *hw, struct i40e_virt_mem *mem)
+{
+       /* it's ok to kfree a NULL pointer */
+       kfree(mem->va);
+       mem->va = NULL;
+       mem->size = 0;
+
+       return 0;
+}
+
+/**
+ * i40e_get_lump - find a lump of free generic resource
+ * @pf: board private structure
+ * @pile: the pile of resource to search
+ * @needed: the number of items needed
+ * @id: an owner id to stick on the items assigned
+ *
+ * Returns the base item index of the lump, or negative for error
+ *
+ * The search_hint trick and lack of advanced fit-finding only work
+ * because we're highly likely to have all the same size lump requests.
+ * Linear search time and any fragmentation should be minimal.
+ **/
+static int i40e_get_lump(struct i40e_pf *pf, struct i40e_lump_tracking *pile,
+                        u16 needed, u16 id)
+{
+       int ret = -ENOMEM;
+       int i = 0;
+       int j = 0;
+
+       if (!pile || needed == 0 || id >= I40E_PILE_VALID_BIT) {
+               dev_info(&pf->pdev->dev,
+                        "param err: pile=%p needed=%d id=0x%04x\n",
+                        pile, needed, id);
+               return -EINVAL;
+       }
+
+       /* start the linear search with an imperfect hint */
+       i = pile->search_hint;
+       while (i < pile->num_entries && ret < 0) {
+               /* skip already allocated entries */
+               if (pile->list[i] & I40E_PILE_VALID_BIT) {
+                       i++;
+                       continue;
+               }
+
+               /* do we have enough in this lump? */
+               for (j = 0; (j < needed) && ((i+j) < pile->num_entries); j++) {
+                       if (pile->list[i+j] & I40E_PILE_VALID_BIT)
+                               break;
+               }
+
+               if (j == needed) {
+                       /* there was enough, so assign it to the requestor */
+                       for (j = 0; j < needed; j++)
+                               pile->list[i+j] = id | I40E_PILE_VALID_BIT;
+                       ret = i;
+                       pile->search_hint = i + j;
+               } else {
+                       /* not enough, so skip over it and continue looking */
+                       i += j;
+               }
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_put_lump - return a lump of generic resource
+ * @pile: the pile of resource to search
+ * @index: the base item index
+ * @id: the owner id of the items assigned
+ *
+ * Returns the count of items in the lump
+ **/
+static int i40e_put_lump(struct i40e_lump_tracking *pile, u16 index, u16 id)
+{
+       int valid_id = (id | I40E_PILE_VALID_BIT);
+       int count = 0;
+       int i;
+
+       if (!pile || index >= pile->num_entries)
+               return -EINVAL;
+
+       for (i = index;
+            i < pile->num_entries && pile->list[i] == valid_id;
+            i++) {
+               pile->list[i] = 0;
+               count++;
+       }
+
+       if (count && index < pile->search_hint)
+               pile->search_hint = index;
+
+       return count;
+}
+
+/**
+ * i40e_service_event_schedule - Schedule the service task to wake up
+ * @pf: board private structure
+ *
+ * If not already scheduled, this puts the task into the work queue
+ **/
+static void i40e_service_event_schedule(struct i40e_pf *pf)
+{
+       if (!test_bit(__I40E_DOWN, &pf->state) &&
+           !test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state) &&
+           !test_and_set_bit(__I40E_SERVICE_SCHED, &pf->state))
+               schedule_work(&pf->service_task);
+}
+
+/**
+ * i40e_tx_timeout - Respond to a Tx Hang
+ * @netdev: network interface device structure
+ *
+ * If any port has noticed a Tx timeout, it is likely that the whole
+ * device is munged, not just the one netdev port, so go for the full
+ * reset.
+ **/
+static void i40e_tx_timeout(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+
+       pf->tx_timeout_count++;
+
+       if (time_after(jiffies, (pf->tx_timeout_last_recovery + HZ*20)))
+               pf->tx_timeout_recovery_level = 0;
+       pf->tx_timeout_last_recovery = jiffies;
+       netdev_info(netdev, "tx_timeout recovery level %d\n",
+                   pf->tx_timeout_recovery_level);
+
+       switch (pf->tx_timeout_recovery_level) {
+       case 0:
+               /* disable and re-enable queues for the VSI */
+               if (in_interrupt()) {
+                       set_bit(__I40E_REINIT_REQUESTED, &pf->state);
+                       set_bit(__I40E_REINIT_REQUESTED, &vsi->state);
+               } else {
+                       i40e_vsi_reinit_locked(vsi);
+               }
+               break;
+       case 1:
+               set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+               break;
+       case 2:
+               set_bit(__I40E_CORE_RESET_REQUESTED, &pf->state);
+               break;
+       case 3:
+               set_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state);
+               break;
+       default:
+               netdev_err(netdev, "tx_timeout recovery unsuccessful\n");
+               i40e_down(vsi);
+               break;
+       }
+       i40e_service_event_schedule(pf);
+       pf->tx_timeout_recovery_level++;
+}
+
+/**
+ * i40e_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ **/
+static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+{
+       rx_ring->next_to_use = val;
+
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.  (Only
+        * applicable for weak-ordered memory model archs,
+        * such as IA-64).
+        */
+       wmb();
+       writel(val, rx_ring->tail);
+}
+
+/**
+ * i40e_get_vsi_stats_struct - Get System Network Statistics
+ * @vsi: the VSI we care about
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the service task.
+ **/
+struct rtnl_link_stats64 *i40e_get_vsi_stats_struct(struct i40e_vsi *vsi)
+{
+       return &vsi->net_stats;
+}
+
+/**
+ * i40e_get_netdev_stats_struct - Get statistics for netdev interface
+ * @netdev: network interface device structure
+ *
+ * Returns the address of the device statistics structure.
+ * The statistics are actually updated from the service task.
+ **/
+static struct rtnl_link_stats64 *i40e_get_netdev_stats_struct(
+                                            struct net_device *netdev,
+                                            struct rtnl_link_stats64 *storage)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       *storage = *i40e_get_vsi_stats_struct(vsi);
+
+       return storage;
+}
+
+/**
+ * i40e_vsi_reset_stats - Resets all stats of the given vsi
+ * @vsi: the VSI to have its stats reset
+ **/
+void i40e_vsi_reset_stats(struct i40e_vsi *vsi)
+{
+       struct rtnl_link_stats64 *ns;
+       int i;
+
+       if (!vsi)
+               return;
+
+       ns = i40e_get_vsi_stats_struct(vsi);
+       memset(ns, 0, sizeof(*ns));
+       memset(&vsi->net_stats_offsets, 0, sizeof(vsi->net_stats_offsets));
+       memset(&vsi->eth_stats, 0, sizeof(vsi->eth_stats));
+       memset(&vsi->eth_stats_offsets, 0, sizeof(vsi->eth_stats_offsets));
+       if (vsi->rx_rings)
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       memset(&vsi->rx_rings[i].rx_stats, 0 ,
+                              sizeof(vsi->rx_rings[i].rx_stats));
+                       memset(&vsi->tx_rings[i].tx_stats, 0,
+                              sizeof(vsi->tx_rings[i].tx_stats));
+               }
+       vsi->stat_offsets_loaded = false;
+}
+
+/**
+ * i40e_pf_reset_stats - Reset all of the stats for the given pf
+ * @pf: the PF to be reset
+ **/
+void i40e_pf_reset_stats(struct i40e_pf *pf)
+{
+       memset(&pf->stats, 0, sizeof(pf->stats));
+       memset(&pf->stats_offsets, 0, sizeof(pf->stats_offsets));
+       pf->stat_offsets_loaded = false;
+}
+
+/**
+ * i40e_stat_update48 - read and update a 48 bit stat from the chip
+ * @hw: ptr to the hardware info
+ * @hireg: the high 32 bit reg to read
+ * @loreg: the low 32 bit reg to read
+ * @offset_loaded: has the initial offset been loaded yet
+ * @offset: ptr to current offset value
+ * @stat: ptr to the stat
+ *
+ * Since the device stats are not reset at PFReset, they likely will not
+ * be zeroed when the driver starts.  We'll save the first values read
+ * and use them as offsets to be subtracted from the raw values in order
+ * to report stats that count from zero.  In the process, we also manage
+ * the potential roll-over.
+ **/
+static void i40e_stat_update48(struct i40e_hw *hw, u32 hireg, u32 loreg,
+                              bool offset_loaded, u64 *offset, u64 *stat)
+{
+       u64 new_data;
+
+       if (hw->device_id == I40E_QEMU_DEVICE_ID) {
+               new_data = rd32(hw, loreg);
+               new_data |= ((u64)(rd32(hw, hireg) & 0xFFFF)) << 32;
+       } else {
+               new_data = rd64(hw, loreg);
+       }
+       if (!offset_loaded)
+               *offset = new_data;
+       if (likely(new_data >= *offset))
+               *stat = new_data - *offset;
+       else
+               *stat = (new_data + ((u64)1 << 48)) - *offset;
+       *stat &= 0xFFFFFFFFFFFFULL;
+}
+
+/**
+ * i40e_stat_update32 - read and update a 32 bit stat from the chip
+ * @hw: ptr to the hardware info
+ * @reg: the hw reg to read
+ * @offset_loaded: has the initial offset been loaded yet
+ * @offset: ptr to current offset value
+ * @stat: ptr to the stat
+ **/
+static void i40e_stat_update32(struct i40e_hw *hw, u32 reg,
+                              bool offset_loaded, u64 *offset, u64 *stat)
+{
+       u32 new_data;
+
+       new_data = rd32(hw, reg);
+       if (!offset_loaded)
+               *offset = new_data;
+       if (likely(new_data >= *offset))
+               *stat = (u32)(new_data - *offset);
+       else
+               *stat = (u32)((new_data + ((u64)1 << 32)) - *offset);
+}
+
+/**
+ * i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
+ * @vsi: the VSI to be updated
+ **/
+void i40e_update_eth_stats(struct i40e_vsi *vsi)
+{
+       int stat_idx = le16_to_cpu(vsi->info.stat_counter_idx);
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_eth_stats *oes;
+       struct i40e_eth_stats *es;     /* device's eth stats */
+
+       es = &vsi->eth_stats;
+       oes = &vsi->eth_stats_offsets;
+
+       /* Gather up the stats that the hw collects */
+       i40e_stat_update32(hw, I40E_GLV_TEPC(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->tx_errors, &es->tx_errors);
+       i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->rx_discards, &es->rx_discards);
+
+       i40e_stat_update48(hw, I40E_GLV_GORCH(stat_idx),
+                          I40E_GLV_GORCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->rx_bytes, &es->rx_bytes);
+       i40e_stat_update48(hw, I40E_GLV_UPRCH(stat_idx),
+                          I40E_GLV_UPRCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->rx_unicast, &es->rx_unicast);
+       i40e_stat_update48(hw, I40E_GLV_MPRCH(stat_idx),
+                          I40E_GLV_MPRCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->rx_multicast, &es->rx_multicast);
+       i40e_stat_update48(hw, I40E_GLV_BPRCH(stat_idx),
+                          I40E_GLV_BPRCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->rx_broadcast, &es->rx_broadcast);
+
+       i40e_stat_update48(hw, I40E_GLV_GOTCH(stat_idx),
+                          I40E_GLV_GOTCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->tx_bytes, &es->tx_bytes);
+       i40e_stat_update48(hw, I40E_GLV_UPTCH(stat_idx),
+                          I40E_GLV_UPTCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->tx_unicast, &es->tx_unicast);
+       i40e_stat_update48(hw, I40E_GLV_MPTCH(stat_idx),
+                          I40E_GLV_MPTCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->tx_multicast, &es->tx_multicast);
+       i40e_stat_update48(hw, I40E_GLV_BPTCH(stat_idx),
+                          I40E_GLV_BPTCL(stat_idx),
+                          vsi->stat_offsets_loaded,
+                          &oes->tx_broadcast, &es->tx_broadcast);
+       vsi->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_update_veb_stats - Update Switch component statistics
+ * @veb: the VEB being updated
+ **/
+static void i40e_update_veb_stats(struct i40e_veb *veb)
+{
+       struct i40e_pf *pf = veb->pf;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_eth_stats *oes;
+       struct i40e_eth_stats *es;     /* device's eth stats */
+       int idx = 0;
+
+       idx = veb->stats_idx;
+       es = &veb->stats;
+       oes = &veb->stats_offsets;
+
+       /* Gather up the stats that the hw collects */
+       i40e_stat_update32(hw, I40E_GLSW_TDPC(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->tx_discards, &es->tx_discards);
+       i40e_stat_update32(hw, I40E_GLSW_RUPP(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->rx_unknown_protocol, &es->rx_unknown_protocol);
+
+       i40e_stat_update48(hw, I40E_GLSW_GORCH(idx), I40E_GLSW_GORCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->rx_bytes, &es->rx_bytes);
+       i40e_stat_update48(hw, I40E_GLSW_UPRCH(idx), I40E_GLSW_UPRCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->rx_unicast, &es->rx_unicast);
+       i40e_stat_update48(hw, I40E_GLSW_MPRCH(idx), I40E_GLSW_MPRCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->rx_multicast, &es->rx_multicast);
+       i40e_stat_update48(hw, I40E_GLSW_BPRCH(idx), I40E_GLSW_BPRCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->rx_broadcast, &es->rx_broadcast);
+
+       i40e_stat_update48(hw, I40E_GLSW_GOTCH(idx), I40E_GLSW_GOTCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->tx_bytes, &es->tx_bytes);
+       i40e_stat_update48(hw, I40E_GLSW_UPTCH(idx), I40E_GLSW_UPTCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->tx_unicast, &es->tx_unicast);
+       i40e_stat_update48(hw, I40E_GLSW_MPTCH(idx), I40E_GLSW_MPTCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->tx_multicast, &es->tx_multicast);
+       i40e_stat_update48(hw, I40E_GLSW_BPTCH(idx), I40E_GLSW_BPTCL(idx),
+                          veb->stat_offsets_loaded,
+                          &oes->tx_broadcast, &es->tx_broadcast);
+       veb->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_update_link_xoff_rx - Update XOFF received in link flow control mode
+ * @pf: the corresponding PF
+ *
+ * Update the Rx XOFF counter (PAUSE frames) in link flow control mode
+ **/
+static void i40e_update_link_xoff_rx(struct i40e_pf *pf)
+{
+       struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+       struct i40e_hw_port_stats *nsd = &pf->stats;
+       struct i40e_hw *hw = &pf->hw;
+       u64 xoff = 0;
+       u16 i, v;
+
+       if ((hw->fc.current_mode != I40E_FC_FULL) &&
+           (hw->fc.current_mode != I40E_FC_RX_PAUSE))
+               return;
+
+       xoff = nsd->link_xoff_rx;
+       i40e_stat_update32(hw, I40E_GLPRT_LXOFFRXC(hw->port),
+                          pf->stat_offsets_loaded,
+                          &osd->link_xoff_rx, &nsd->link_xoff_rx);
+
+       /* No new LFC xoff rx */
+       if (!(nsd->link_xoff_rx - xoff))
+               return;
+
+       /* Clear the __I40E_HANG_CHECK_ARMED bit for all Tx rings */
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               struct i40e_vsi *vsi = pf->vsi[v];
+
+               if (!vsi)
+                       continue;
+
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       struct i40e_ring *ring = &vsi->tx_rings[i];
+                       clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
+               }
+       }
+}
+
+/**
+ * i40e_update_prio_xoff_rx - Update XOFF received in PFC mode
+ * @pf: the corresponding PF
+ *
+ * Update the Rx XOFF counter (PAUSE frames) in PFC mode
+ **/
+static void i40e_update_prio_xoff_rx(struct i40e_pf *pf)
+{
+       struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+       struct i40e_hw_port_stats *nsd = &pf->stats;
+       bool xoff[I40E_MAX_TRAFFIC_CLASS] = {false};
+       struct i40e_dcbx_config *dcb_cfg;
+       struct i40e_hw *hw = &pf->hw;
+       u16 i, v;
+       u8 tc;
+
+       dcb_cfg = &hw->local_dcbx_config;
+
+       /* See if DCB enabled with PFC TC */
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED) ||
+           !(dcb_cfg->pfc.pfcenable)) {
+               i40e_update_link_xoff_rx(pf);
+               return;
+       }
+
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+               u64 prio_xoff = nsd->priority_xoff_rx[i];
+               i40e_stat_update32(hw, I40E_GLPRT_PXOFFRXC(hw->port, i),
+                                  pf->stat_offsets_loaded,
+                                  &osd->priority_xoff_rx[i],
+                                  &nsd->priority_xoff_rx[i]);
+
+               /* No new PFC xoff rx */
+               if (!(nsd->priority_xoff_rx[i] - prio_xoff))
+                       continue;
+               /* Get the TC for given priority */
+               tc = dcb_cfg->etscfg.prioritytable[i];
+               xoff[tc] = true;
+       }
+
+       /* Clear the __I40E_HANG_CHECK_ARMED bit for Tx rings */
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               struct i40e_vsi *vsi = pf->vsi[v];
+
+               if (!vsi)
+                       continue;
+
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       struct i40e_ring *ring = &vsi->tx_rings[i];
+
+                       tc = ring->dcb_tc;
+                       if (xoff[tc])
+                               clear_bit(__I40E_HANG_CHECK_ARMED,
+                                         &ring->state);
+               }
+       }
+}
+
+/**
+ * i40e_update_stats - Update the board statistics counters.
+ * @vsi: the VSI to be updated
+ *
+ * There are a few instances where we store the same stat in a
+ * couple of different structs.  This is partly because we have
+ * the netdev stats that need to be filled out, which is slightly
+ * different from the "eth_stats" defined by the chip and used in
+ * VF communications.  We sort it all out here in a central place.
+ **/
+void i40e_update_stats(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct rtnl_link_stats64 *ons;
+       struct rtnl_link_stats64 *ns;   /* netdev stats */
+       struct i40e_eth_stats *oes;
+       struct i40e_eth_stats *es;     /* device's eth stats */
+       u32 tx_restart, tx_busy;
+       u32 rx_page, rx_buf;
+       u64 rx_p, rx_b;
+       u64 tx_p, tx_b;
+       int i;
+       u16 q;
+
+       if (test_bit(__I40E_DOWN, &vsi->state) ||
+           test_bit(__I40E_CONFIG_BUSY, &pf->state))
+               return;
+
+       ns = i40e_get_vsi_stats_struct(vsi);
+       ons = &vsi->net_stats_offsets;
+       es = &vsi->eth_stats;
+       oes = &vsi->eth_stats_offsets;
+
+       /* Gather up the netdev and vsi stats that the driver collects
+        * on the fly during packet processing
+        */
+       rx_b = rx_p = 0;
+       tx_b = tx_p = 0;
+       tx_restart = tx_busy = 0;
+       rx_page = 0;
+       rx_buf = 0;
+       for (q = 0; q < vsi->num_queue_pairs; q++) {
+               struct i40e_ring *p;
+
+               p = &vsi->rx_rings[q];
+               rx_b += p->rx_stats.bytes;
+               rx_p += p->rx_stats.packets;
+               rx_buf += p->rx_stats.alloc_rx_buff_failed;
+               rx_page += p->rx_stats.alloc_rx_page_failed;
+
+               p = &vsi->tx_rings[q];
+               tx_b += p->tx_stats.bytes;
+               tx_p += p->tx_stats.packets;
+               tx_restart += p->tx_stats.restart_queue;
+               tx_busy += p->tx_stats.tx_busy;
+       }
+       vsi->tx_restart = tx_restart;
+       vsi->tx_busy = tx_busy;
+       vsi->rx_page_failed = rx_page;
+       vsi->rx_buf_failed = rx_buf;
+
+       ns->rx_packets = rx_p;
+       ns->rx_bytes = rx_b;
+       ns->tx_packets = tx_p;
+       ns->tx_bytes = tx_b;
+
+       i40e_update_eth_stats(vsi);
+       /* update netdev stats from eth stats */
+       ons->rx_errors = oes->rx_errors;
+       ns->rx_errors = es->rx_errors;
+       ons->tx_errors = oes->tx_errors;
+       ns->tx_errors = es->tx_errors;
+       ons->multicast = oes->rx_multicast;
+       ns->multicast = es->rx_multicast;
+       ons->tx_dropped = oes->tx_discards;
+       ns->tx_dropped = es->tx_discards;
+
+       /* Get the port data only if this is the main PF VSI */
+       if (vsi == pf->vsi[pf->lan_vsi]) {
+               struct i40e_hw_port_stats *nsd = &pf->stats;
+               struct i40e_hw_port_stats *osd = &pf->stats_offsets;
+
+               i40e_stat_update48(hw, I40E_GLPRT_GORCH(hw->port),
+                                  I40E_GLPRT_GORCL(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->eth.rx_bytes, &nsd->eth.rx_bytes);
+               i40e_stat_update48(hw, I40E_GLPRT_GOTCH(hw->port),
+                                  I40E_GLPRT_GOTCL(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->eth.tx_bytes, &nsd->eth.tx_bytes);
+               i40e_stat_update32(hw, I40E_GLPRT_RDPC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->eth.rx_discards,
+                                  &nsd->eth.rx_discards);
+               i40e_stat_update32(hw, I40E_GLPRT_TDPC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->eth.tx_discards,
+                                  &nsd->eth.tx_discards);
+               i40e_stat_update48(hw, I40E_GLPRT_MPRCH(hw->port),
+                                  I40E_GLPRT_MPRCL(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->eth.rx_multicast,
+                                  &nsd->eth.rx_multicast);
+
+               i40e_stat_update32(hw, I40E_GLPRT_TDOLD(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_dropped_link_down,
+                                  &nsd->tx_dropped_link_down);
+
+               i40e_stat_update32(hw, I40E_GLPRT_CRCERRS(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->crc_errors, &nsd->crc_errors);
+               ns->rx_crc_errors = nsd->crc_errors;
+
+               i40e_stat_update32(hw, I40E_GLPRT_ILLERRC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->illegal_bytes, &nsd->illegal_bytes);
+               ns->rx_errors = nsd->crc_errors
+                               + nsd->illegal_bytes;
+
+               i40e_stat_update32(hw, I40E_GLPRT_MLFC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->mac_local_faults,
+                                  &nsd->mac_local_faults);
+               i40e_stat_update32(hw, I40E_GLPRT_MRFC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->mac_remote_faults,
+                                  &nsd->mac_remote_faults);
+
+               i40e_stat_update32(hw, I40E_GLPRT_RLEC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_length_errors,
+                                  &nsd->rx_length_errors);
+               ns->rx_length_errors = nsd->rx_length_errors;
+
+               i40e_stat_update32(hw, I40E_GLPRT_LXONRXC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->link_xon_rx, &nsd->link_xon_rx);
+               i40e_stat_update32(hw, I40E_GLPRT_LXONTXC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->link_xon_tx, &nsd->link_xon_tx);
+               i40e_update_prio_xoff_rx(pf);  /* handles I40E_GLPRT_LXOFFRXC */
+               i40e_stat_update32(hw, I40E_GLPRT_LXOFFTXC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->link_xoff_tx, &nsd->link_xoff_tx);
+
+               for (i = 0; i < 8; i++) {
+                       i40e_stat_update32(hw, I40E_GLPRT_PXONRXC(hw->port, i),
+                                          pf->stat_offsets_loaded,
+                                          &osd->priority_xon_rx[i],
+                                          &nsd->priority_xon_rx[i]);
+                       i40e_stat_update32(hw, I40E_GLPRT_PXONTXC(hw->port, i),
+                                          pf->stat_offsets_loaded,
+                                          &osd->priority_xon_tx[i],
+                                          &nsd->priority_xon_tx[i]);
+                       i40e_stat_update32(hw, I40E_GLPRT_PXOFFTXC(hw->port, i),
+                                          pf->stat_offsets_loaded,
+                                          &osd->priority_xoff_tx[i],
+                                          &nsd->priority_xoff_tx[i]);
+                       i40e_stat_update32(hw,
+                                          I40E_GLPRT_RXON2OFFCNT(hw->port, i),
+                                          pf->stat_offsets_loaded,
+                                          &osd->priority_xon_2_xoff[i],
+                                          &nsd->priority_xon_2_xoff[i]);
+               }
+
+               i40e_stat_update48(hw, I40E_GLPRT_PRC64H(hw->port),
+                                  I40E_GLPRT_PRC64L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_64, &nsd->rx_size_64);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC127H(hw->port),
+                                  I40E_GLPRT_PRC127L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_127, &nsd->rx_size_127);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC255H(hw->port),
+                                  I40E_GLPRT_PRC255L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_255, &nsd->rx_size_255);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC511H(hw->port),
+                                  I40E_GLPRT_PRC511L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_511, &nsd->rx_size_511);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC1023H(hw->port),
+                                  I40E_GLPRT_PRC1023L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_1023, &nsd->rx_size_1023);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC1522H(hw->port),
+                                  I40E_GLPRT_PRC1522L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_1522, &nsd->rx_size_1522);
+               i40e_stat_update48(hw, I40E_GLPRT_PRC9522H(hw->port),
+                                  I40E_GLPRT_PRC9522L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_size_big, &nsd->rx_size_big);
+
+               i40e_stat_update48(hw, I40E_GLPRT_PTC64H(hw->port),
+                                  I40E_GLPRT_PTC64L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_64, &nsd->tx_size_64);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC127H(hw->port),
+                                  I40E_GLPRT_PTC127L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_127, &nsd->tx_size_127);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC255H(hw->port),
+                                  I40E_GLPRT_PTC255L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_255, &nsd->tx_size_255);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC511H(hw->port),
+                                  I40E_GLPRT_PTC511L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_511, &nsd->tx_size_511);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC1023H(hw->port),
+                                  I40E_GLPRT_PTC1023L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_1023, &nsd->tx_size_1023);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC1522H(hw->port),
+                                  I40E_GLPRT_PTC1522L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_1522, &nsd->tx_size_1522);
+               i40e_stat_update48(hw, I40E_GLPRT_PTC9522H(hw->port),
+                                  I40E_GLPRT_PTC9522L(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->tx_size_big, &nsd->tx_size_big);
+
+               i40e_stat_update32(hw, I40E_GLPRT_RUC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_undersize, &nsd->rx_undersize);
+               i40e_stat_update32(hw, I40E_GLPRT_RFC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_fragments, &nsd->rx_fragments);
+               i40e_stat_update32(hw, I40E_GLPRT_ROC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_oversize, &nsd->rx_oversize);
+               i40e_stat_update32(hw, I40E_GLPRT_RJC(hw->port),
+                                  pf->stat_offsets_loaded,
+                                  &osd->rx_jabber, &nsd->rx_jabber);
+       }
+
+       pf->stat_offsets_loaded = true;
+}
+
+/**
+ * i40e_find_filter - Search VSI filter list for specific mac/vlan filter
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the vlan
+ * @is_vf: make sure its a vf filter, else doesn't matter
+ * @is_netdev: make sure its a netdev filter, else doesn't matter
+ *
+ * Returns ptr to the filter object or NULL
+ **/
+static struct i40e_mac_filter *i40e_find_filter(struct i40e_vsi *vsi,
+                                               u8 *macaddr, s16 vlan,
+                                               bool is_vf, bool is_netdev)
+{
+       struct i40e_mac_filter *f;
+
+       if (!vsi || !macaddr)
+               return NULL;
+
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               if ((ether_addr_equal(macaddr, f->macaddr)) &&
+                   (vlan == f->vlan)    &&
+                   (!is_vf || f->is_vf) &&
+                   (!is_netdev || f->is_netdev))
+                       return f;
+       }
+       return NULL;
+}
+
+/**
+ * i40e_find_mac - Find a mac addr in the macvlan filters list
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address we are searching for
+ * @is_vf: make sure its a vf filter, else doesn't matter
+ * @is_netdev: make sure its a netdev filter, else doesn't matter
+ *
+ * Returns the first filter with the provided MAC address or NULL if
+ * MAC address was not found
+ **/
+struct i40e_mac_filter *i40e_find_mac(struct i40e_vsi *vsi, u8 *macaddr,
+                                     bool is_vf, bool is_netdev)
+{
+       struct i40e_mac_filter *f;
+
+       if (!vsi || !macaddr)
+               return NULL;
+
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               if ((ether_addr_equal(macaddr, f->macaddr)) &&
+                   (!is_vf || f->is_vf) &&
+                   (!is_netdev || f->is_netdev))
+                       return f;
+       }
+       return NULL;
+}
+
+/**
+ * i40e_is_vsi_in_vlan - Check if VSI is in vlan mode
+ * @vsi: the VSI to be searched
+ *
+ * Returns true if VSI is in vlan mode or false otherwise
+ **/
+bool i40e_is_vsi_in_vlan(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f;
+
+       /* Only -1 for all the filters denotes not in vlan mode
+        * so we have to go through all the list in order to make sure
+        */
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               if (f->vlan >= 0)
+                       return true;
+       }
+
+       return false;
+}
+
+/**
+ * i40e_put_mac_in_vlan - Make macvlan filters from macaddrs and vlans
+ * @vsi: the VSI to be searched
+ * @macaddr: the mac address to be filtered
+ * @is_vf: true if it is a vf
+ * @is_netdev: true if it is a netdev
+ *
+ * Goes through all the macvlan filters and adds a
+ * macvlan filter for each unique vlan that already exists
+ *
+ * Returns first filter found on success, else NULL
+ **/
+struct i40e_mac_filter *i40e_put_mac_in_vlan(struct i40e_vsi *vsi, u8 *macaddr,
+                                            bool is_vf, bool is_netdev)
+{
+       struct i40e_mac_filter *f;
+
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               if (!i40e_find_filter(vsi, macaddr, f->vlan,
+                                     is_vf, is_netdev)) {
+                       if (!i40e_add_filter(vsi, macaddr, f->vlan,
+                                               is_vf, is_netdev))
+                               return NULL;
+               }
+       }
+
+       return list_first_entry_or_null(&vsi->mac_filter_list,
+                                       struct i40e_mac_filter, list);
+}
+
+/**
+ * i40e_add_filter - Add a mac/vlan filter to the VSI
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the vlan
+ * @is_vf: make sure its a vf filter, else doesn't matter
+ * @is_netdev: make sure its a netdev filter, else doesn't matter
+ *
+ * Returns ptr to the filter object or NULL when no memory available.
+ **/
+struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
+                                       u8 *macaddr, s16 vlan,
+                                       bool is_vf, bool is_netdev)
+{
+       struct i40e_mac_filter *f;
+
+       if (!vsi || !macaddr)
+               return NULL;
+
+       f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
+       if (!f) {
+               f = kzalloc(sizeof(*f), GFP_ATOMIC);
+               if (!f)
+                       goto add_filter_out;
+
+               memcpy(f->macaddr, macaddr, ETH_ALEN);
+               f->vlan = vlan;
+               f->changed = true;
+
+               INIT_LIST_HEAD(&f->list);
+               list_add(&f->list, &vsi->mac_filter_list);
+       }
+
+       /* increment counter and add a new flag if needed */
+       if (is_vf) {
+               if (!f->is_vf) {
+                       f->is_vf = true;
+                       f->counter++;
+               }
+       } else if (is_netdev) {
+               if (!f->is_netdev) {
+                       f->is_netdev = true;
+                       f->counter++;
+               }
+       } else {
+               f->counter++;
+       }
+
+       /* changed tells sync_filters_subtask to
+        * push the filter down to the firmware
+        */
+       if (f->changed) {
+               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+               vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+       }
+
+add_filter_out:
+       return f;
+}
+
+/**
+ * i40e_del_filter - Remove a mac/vlan filter from the VSI
+ * @vsi: the VSI to be searched
+ * @macaddr: the MAC address
+ * @vlan: the vlan
+ * @is_vf: make sure it's a vf filter, else doesn't matter
+ * @is_netdev: make sure it's a netdev filter, else doesn't matter
+ **/
+void i40e_del_filter(struct i40e_vsi *vsi,
+                    u8 *macaddr, s16 vlan,
+                    bool is_vf, bool is_netdev)
+{
+       struct i40e_mac_filter *f;
+
+       if (!vsi || !macaddr)
+               return;
+
+       f = i40e_find_filter(vsi, macaddr, vlan, is_vf, is_netdev);
+       if (!f || f->counter == 0)
+               return;
+
+       if (is_vf) {
+               if (f->is_vf) {
+                       f->is_vf = false;
+                       f->counter--;
+               }
+       } else if (is_netdev) {
+               if (f->is_netdev) {
+                       f->is_netdev = false;
+                       f->counter--;
+               }
+       } else {
+               /* make sure we don't remove a filter in use by vf or netdev */
+               int min_f = 0;
+               min_f += (f->is_vf ? 1 : 0);
+               min_f += (f->is_netdev ? 1 : 0);
+
+               if (f->counter > min_f)
+                       f->counter--;
+       }
+
+       /* counter == 0 tells sync_filters_subtask to
+        * remove the filter from the firmware's list
+        */
+       if (f->counter == 0) {
+               f->changed = true;
+               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+               vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+       }
+}
+
+/**
+ * i40e_set_mac - NDO callback to set mac address
+ * @netdev: network interface device structure
+ * @p: pointer to an address structure
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_set_mac(struct net_device *netdev, void *p)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct sockaddr *addr = p;
+       struct i40e_mac_filter *f;
+
+       if (!is_valid_ether_addr(addr->sa_data))
+               return -EADDRNOTAVAIL;
+
+       netdev_info(netdev, "set mac address=%pM\n", addr->sa_data);
+
+       if (ether_addr_equal(netdev->dev_addr, addr->sa_data))
+               return 0;
+
+       if (vsi->type == I40E_VSI_MAIN) {
+               i40e_status ret;
+               ret = i40e_aq_mac_address_write(&vsi->back->hw,
+                                               I40E_AQC_WRITE_TYPE_LAA_ONLY,
+                                               addr->sa_data, NULL);
+               if (ret) {
+                       netdev_info(netdev,
+                                   "Addr change for Main VSI failed: %d\n",
+                                   ret);
+                       return -EADDRNOTAVAIL;
+               }
+
+               memcpy(vsi->back->hw.mac.addr, addr->sa_data, netdev->addr_len);
+       }
+
+       /* In order to be sure to not drop any packets, add the new address
+        * then delete the old one.
+        */
+       f = i40e_add_filter(vsi, addr->sa_data, I40E_VLAN_ANY, false, false);
+       if (!f)
+               return -ENOMEM;
+
+       i40e_sync_vsi_filters(vsi);
+       i40e_del_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY, false, false);
+       i40e_sync_vsi_filters(vsi);
+
+       memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_setup_queue_map - Setup a VSI queue map based on enabled_tc
+ * @vsi: the VSI being setup
+ * @ctxt: VSI context structure
+ * @enabled_tc: Enabled TCs bitmap
+ * @is_add: True if called before Add VSI
+ *
+ * Setup VSI queue mapping for enabled traffic classes.
+ **/
+static void i40e_vsi_setup_queue_map(struct i40e_vsi *vsi,
+                                    struct i40e_vsi_context *ctxt,
+                                    u8 enabled_tc,
+                                    bool is_add)
+{
+       struct i40e_pf *pf = vsi->back;
+       u16 sections = 0;
+       u8 netdev_tc = 0;
+       u16 numtc = 0;
+       u16 qcount;
+       u8 offset;
+       u16 qmap;
+       int i;
+
+       sections = I40E_AQ_VSI_PROP_QUEUE_MAP_VALID;
+       offset = 0;
+
+       if (enabled_tc && (vsi->back->flags & I40E_FLAG_DCB_ENABLED)) {
+               /* Find numtc from enabled TC bitmap */
+               for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                       if (enabled_tc & (1 << i)) /* TC is enabled */
+                               numtc++;
+               }
+               if (!numtc) {
+                       dev_warn(&pf->pdev->dev, "DCB is enabled but no TC enabled, forcing TC0\n");
+                       numtc = 1;
+               }
+       } else {
+               /* At least TC0 is enabled in case of non-DCB case */
+               numtc = 1;
+       }
+
+       vsi->tc_config.numtc = numtc;
+       vsi->tc_config.enabled_tc = enabled_tc ? enabled_tc : 1;
+
+       /* Setup queue offset/count for all TCs for given VSI */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* See if the given TC is enabled for the given VSI */
+               if (vsi->tc_config.enabled_tc & (1 << i)) { /* TC is enabled */
+                       int pow, num_qps;
+
+                       vsi->tc_config.tc_info[i].qoffset = offset;
+                       switch (vsi->type) {
+                       case I40E_VSI_MAIN:
+                               if (i == 0)
+                                       qcount = pf->rss_size;
+                               else
+                                       qcount = pf->num_tc_qps;
+                               vsi->tc_config.tc_info[i].qcount = qcount;
+                               break;
+                       case I40E_VSI_FDIR:
+                       case I40E_VSI_SRIOV:
+                       case I40E_VSI_VMDQ2:
+                       default:
+                               qcount = vsi->alloc_queue_pairs;
+                               vsi->tc_config.tc_info[i].qcount = qcount;
+                               WARN_ON(i != 0);
+                               break;
+                       }
+
+                       /* find the power-of-2 of the number of queue pairs */
+                       num_qps = vsi->tc_config.tc_info[i].qcount;
+                       pow = 0;
+                       while (num_qps &&
+                             ((1 << pow) < vsi->tc_config.tc_info[i].qcount)) {
+                               pow++;
+                               num_qps >>= 1;
+                       }
+
+                       vsi->tc_config.tc_info[i].netdev_tc = netdev_tc++;
+                       qmap =
+                           (offset << I40E_AQ_VSI_TC_QUE_OFFSET_SHIFT) |
+                           (pow << I40E_AQ_VSI_TC_QUE_NUMBER_SHIFT);
+
+                       offset += vsi->tc_config.tc_info[i].qcount;
+               } else {
+                       /* TC is not enabled so set the offset to
+                        * default queue and allocate one queue
+                        * for the given TC.
+                        */
+                       vsi->tc_config.tc_info[i].qoffset = 0;
+                       vsi->tc_config.tc_info[i].qcount = 1;
+                       vsi->tc_config.tc_info[i].netdev_tc = 0;
+
+                       qmap = 0;
+               }
+               ctxt->info.tc_mapping[i] = cpu_to_le16(qmap);
+       }
+
+       /* Set actual Tx/Rx queue pairs */
+       vsi->num_queue_pairs = offset;
+
+       /* Scheduler section valid can only be set for ADD VSI */
+       if (is_add) {
+               sections |= I40E_AQ_VSI_PROP_SCHED_VALID;
+
+               ctxt->info.up_enable_bits = enabled_tc;
+       }
+       if (vsi->type == I40E_VSI_SRIOV) {
+               ctxt->info.mapping_flags |=
+                                    cpu_to_le16(I40E_AQ_VSI_QUE_MAP_NONCONTIG);
+               for (i = 0; i < vsi->num_queue_pairs; i++)
+                       ctxt->info.queue_mapping[i] =
+                                              cpu_to_le16(vsi->base_queue + i);
+       } else {
+               ctxt->info.mapping_flags |=
+                                       cpu_to_le16(I40E_AQ_VSI_QUE_MAP_CONTIG);
+               ctxt->info.queue_mapping[0] = cpu_to_le16(vsi->base_queue);
+       }
+       ctxt->info.valid_sections |= cpu_to_le16(sections);
+}
+
+/**
+ * i40e_set_rx_mode - NDO callback to set the netdev filters
+ * @netdev: network interface device structure
+ **/
+static void i40e_set_rx_mode(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_vsi *vsi = np->vsi;
+       struct netdev_hw_addr *uca;
+       struct netdev_hw_addr *mca;
+       struct netdev_hw_addr *ha;
+
+       /* add addr if not already in the filter list */
+       netdev_for_each_uc_addr(uca, netdev) {
+               if (!i40e_find_mac(vsi, uca->addr, false, true)) {
+                       if (i40e_is_vsi_in_vlan(vsi))
+                               i40e_put_mac_in_vlan(vsi, uca->addr,
+                                                    false, true);
+                       else
+                               i40e_add_filter(vsi, uca->addr, I40E_VLAN_ANY,
+                                               false, true);
+               }
+       }
+
+       netdev_for_each_mc_addr(mca, netdev) {
+               if (!i40e_find_mac(vsi, mca->addr, false, true)) {
+                       if (i40e_is_vsi_in_vlan(vsi))
+                               i40e_put_mac_in_vlan(vsi, mca->addr,
+                                                    false, true);
+                       else
+                               i40e_add_filter(vsi, mca->addr, I40E_VLAN_ANY,
+                                               false, true);
+               }
+       }
+
+       /* remove filter if not in netdev list */
+       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+               bool found = false;
+
+               if (!f->is_netdev)
+                       continue;
+
+               if (is_multicast_ether_addr(f->macaddr)) {
+                       netdev_for_each_mc_addr(mca, netdev) {
+                               if (ether_addr_equal(mca->addr, f->macaddr)) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+               } else {
+                       netdev_for_each_uc_addr(uca, netdev) {
+                               if (ether_addr_equal(uca->addr, f->macaddr)) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+
+                       for_each_dev_addr(netdev, ha) {
+                               if (ether_addr_equal(ha->addr, f->macaddr)) {
+                                       found = true;
+                                       break;
+                               }
+                       }
+               }
+               if (!found)
+                       i40e_del_filter(
+                          vsi, f->macaddr, I40E_VLAN_ANY, false, true);
+       }
+
+       /* check for other flag changes */
+       if (vsi->current_netdev_flags != vsi->netdev->flags) {
+               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+               vsi->back->flags |= I40E_FLAG_FILTER_SYNC;
+       }
+}
+
+/**
+ * i40e_sync_vsi_filters - Update the VSI filter list to the HW
+ * @vsi: ptr to the VSI
+ *
+ * Push any outstanding VSI filter changes through the AdminQ.
+ *
+ * Returns 0 or error value
+ **/
+int i40e_sync_vsi_filters(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f, *ftmp;
+       bool promisc_forced_on = false;
+       bool add_happened = false;
+       int filter_list_len = 0;
+       u32 changed_flags = 0;
+       i40e_status ret = 0;
+       struct i40e_pf *pf;
+       int num_add = 0;
+       int num_del = 0;
+       u16 cmd_flags;
+
+       /* empty array typed pointers, kcalloc later */
+       struct i40e_aqc_add_macvlan_element_data *add_list;
+       struct i40e_aqc_remove_macvlan_element_data *del_list;
+
+       while (test_and_set_bit(__I40E_CONFIG_BUSY, &vsi->state))
+               usleep_range(1000, 2000);
+       pf = vsi->back;
+
+       if (vsi->netdev) {
+               changed_flags = vsi->current_netdev_flags ^ vsi->netdev->flags;
+               vsi->current_netdev_flags = vsi->netdev->flags;
+       }
+
+       if (vsi->flags & I40E_VSI_FLAG_FILTER_CHANGED) {
+               vsi->flags &= ~I40E_VSI_FLAG_FILTER_CHANGED;
+
+               filter_list_len = pf->hw.aq.asq_buf_size /
+                           sizeof(struct i40e_aqc_remove_macvlan_element_data);
+               del_list = kcalloc(filter_list_len,
+                           sizeof(struct i40e_aqc_remove_macvlan_element_data),
+                           GFP_KERNEL);
+               if (!del_list)
+                       return -ENOMEM;
+
+               list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+                       if (!f->changed)
+                               continue;
+
+                       if (f->counter != 0)
+                               continue;
+                       f->changed = false;
+                       cmd_flags = 0;
+
+                       /* add to delete list */
+                       memcpy(del_list[num_del].mac_addr,
+                              f->macaddr, ETH_ALEN);
+                       del_list[num_del].vlan_tag =
+                               cpu_to_le16((u16)(f->vlan ==
+                                           I40E_VLAN_ANY ? 0 : f->vlan));
+
+                       /* vlan0 as wild card to allow packets from all vlans */
+                       if (f->vlan == I40E_VLAN_ANY ||
+                           (vsi->netdev && !(vsi->netdev->features &
+                                             NETIF_F_HW_VLAN_CTAG_FILTER)))
+                               cmd_flags |= I40E_AQC_MACVLAN_DEL_IGNORE_VLAN;
+                       cmd_flags |= I40E_AQC_MACVLAN_DEL_PERFECT_MATCH;
+                       del_list[num_del].flags = cmd_flags;
+                       num_del++;
+
+                       /* unlink from filter list */
+                       list_del(&f->list);
+                       kfree(f);
+
+                       /* flush a full buffer */
+                       if (num_del == filter_list_len) {
+                               ret = i40e_aq_remove_macvlan(&pf->hw,
+                                           vsi->seid, del_list, num_del,
+                                           NULL);
+                               num_del = 0;
+                               memset(del_list, 0, sizeof(*del_list));
+
+                               if (ret)
+                                       dev_info(&pf->pdev->dev,
+                                                "ignoring delete macvlan error, err %d, aq_err %d while flushing a full buffer\n",
+                                                ret,
+                                                pf->hw.aq.asq_last_status);
+                       }
+               }
+               if (num_del) {
+                       ret = i40e_aq_remove_macvlan(&pf->hw, vsi->seid,
+                                                    del_list, num_del, NULL);
+                       num_del = 0;
+
+                       if (ret)
+                               dev_info(&pf->pdev->dev,
+                                        "ignoring delete macvlan error, err %d, aq_err %d\n",
+                                        ret, pf->hw.aq.asq_last_status);
+               }
+
+               kfree(del_list);
+               del_list = NULL;
+
+               /* do all the adds now */
+               filter_list_len = pf->hw.aq.asq_buf_size /
+                              sizeof(struct i40e_aqc_add_macvlan_element_data),
+               add_list = kcalloc(filter_list_len,
+                              sizeof(struct i40e_aqc_add_macvlan_element_data),
+                              GFP_KERNEL);
+               if (!add_list)
+                       return -ENOMEM;
+
+               list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+                       if (!f->changed)
+                               continue;
+
+                       if (f->counter == 0)
+                               continue;
+                       f->changed = false;
+                       add_happened = true;
+                       cmd_flags = 0;
+
+                       /* add to add array */
+                       memcpy(add_list[num_add].mac_addr,
+                              f->macaddr, ETH_ALEN);
+                       add_list[num_add].vlan_tag =
+                               cpu_to_le16(
+                                (u16)(f->vlan == I40E_VLAN_ANY ? 0 : f->vlan));
+                       add_list[num_add].queue_number = 0;
+
+                       cmd_flags |= I40E_AQC_MACVLAN_ADD_PERFECT_MATCH;
+
+                       /* vlan0 as wild card to allow packets from all vlans */
+                       if (f->vlan == I40E_VLAN_ANY || (vsi->netdev &&
+                           !(vsi->netdev->features &
+                                                NETIF_F_HW_VLAN_CTAG_FILTER)))
+                               cmd_flags |= I40E_AQC_MACVLAN_ADD_IGNORE_VLAN;
+                       add_list[num_add].flags = cpu_to_le16(cmd_flags);
+                       num_add++;
+
+                       /* flush a full buffer */
+                       if (num_add == filter_list_len) {
+                               ret = i40e_aq_add_macvlan(&pf->hw,
+                                                         vsi->seid,
+                                                         add_list,
+                                                         num_add,
+                                                         NULL);
+                               num_add = 0;
+
+                               if (ret)
+                                       break;
+                               memset(add_list, 0, sizeof(*add_list));
+                       }
+               }
+               if (num_add) {
+                       ret = i40e_aq_add_macvlan(&pf->hw, vsi->seid,
+                                                 add_list, num_add, NULL);
+                       num_add = 0;
+               }
+               kfree(add_list);
+               add_list = NULL;
+
+               if (add_happened && (!ret)) {
+                       /* do nothing */;
+               } else if (add_happened && (ret)) {
+                       dev_info(&pf->pdev->dev,
+                                "add filter failed, err %d, aq_err %d\n",
+                                ret, pf->hw.aq.asq_last_status);
+                       if ((pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOSPC) &&
+                           !test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+                                     &vsi->state)) {
+                               promisc_forced_on = true;
+                               set_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+                                       &vsi->state);
+                               dev_info(&pf->pdev->dev, "promiscuous mode forced on\n");
+                       }
+               }
+       }
+
+       /* check for changes in promiscuous modes */
+       if (changed_flags & IFF_ALLMULTI) {
+               bool cur_multipromisc;
+               cur_multipromisc = !!(vsi->current_netdev_flags & IFF_ALLMULTI);
+               ret = i40e_aq_set_vsi_multicast_promiscuous(&vsi->back->hw,
+                                                           vsi->seid,
+                                                           cur_multipromisc,
+                                                           NULL);
+               if (ret)
+                       dev_info(&pf->pdev->dev,
+                                "set multi promisc failed, err %d, aq_err %d\n",
+                                ret, pf->hw.aq.asq_last_status);
+       }
+       if ((changed_flags & IFF_PROMISC) || promisc_forced_on) {
+               bool cur_promisc;
+               cur_promisc = (!!(vsi->current_netdev_flags & IFF_PROMISC) ||
+                              test_bit(__I40E_FILTER_OVERFLOW_PROMISC,
+                                       &vsi->state));
+               ret = i40e_aq_set_vsi_unicast_promiscuous(&vsi->back->hw,
+                                                         vsi->seid,
+                                                         cur_promisc,
+                                                         NULL);
+               if (ret)
+                       dev_info(&pf->pdev->dev,
+                                "set uni promisc failed, err %d, aq_err %d\n",
+                                ret, pf->hw.aq.asq_last_status);
+       }
+
+       clear_bit(__I40E_CONFIG_BUSY, &vsi->state);
+       return 0;
+}
+
+/**
+ * i40e_sync_filters_subtask - Sync the VSI filter list with HW
+ * @pf: board private structure
+ **/
+static void i40e_sync_filters_subtask(struct i40e_pf *pf)
+{
+       int v;
+
+       if (!pf || !(pf->flags & I40E_FLAG_FILTER_SYNC))
+               return;
+       pf->flags &= ~I40E_FLAG_FILTER_SYNC;
+
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               if (pf->vsi[v] &&
+                   (pf->vsi[v]->flags & I40E_VSI_FLAG_FILTER_CHANGED))
+                       i40e_sync_vsi_filters(pf->vsi[v]);
+       }
+}
+
+/**
+ * i40e_change_mtu - NDO callback to change the Maximum Transfer Unit
+ * @netdev: network interface device structure
+ * @new_mtu: new value for maximum frame size
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_change_mtu(struct net_device *netdev, int new_mtu)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
+       struct i40e_vsi *vsi = np->vsi;
+
+       /* MTU < 68 is an error and causes problems on some kernels */
+       if ((new_mtu < 68) || (max_frame > I40E_MAX_RXBUFFER))
+               return -EINVAL;
+
+       netdev_info(netdev, "changing MTU from %d to %d\n",
+                   netdev->mtu, new_mtu);
+       netdev->mtu = new_mtu;
+       if (netif_running(netdev))
+               i40e_vsi_reinit_locked(vsi);
+
+       return 0;
+}
+
+/**
+ * i40e_vlan_stripping_enable - Turn on vlan stripping for the VSI
+ * @vsi: the vsi being adjusted
+ **/
+void i40e_vlan_stripping_enable(struct i40e_vsi *vsi)
+{
+       struct i40e_vsi_context ctxt;
+       i40e_status ret;
+
+       if ((vsi->info.valid_sections &
+            cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
+           ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_MODE_MASK) == 0))
+               return;  /* already enabled */
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+       vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
+                                   I40E_AQ_VSI_PVLAN_EMOD_STR_BOTH;
+
+       ctxt.seid = vsi->seid;
+       memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "%s: update vsi failed, aq_err=%d\n",
+                        __func__, vsi->back->hw.aq.asq_last_status);
+       }
+}
+
+/**
+ * i40e_vlan_stripping_disable - Turn off vlan stripping for the VSI
+ * @vsi: the vsi being adjusted
+ **/
+void i40e_vlan_stripping_disable(struct i40e_vsi *vsi)
+{
+       struct i40e_vsi_context ctxt;
+       i40e_status ret;
+
+       if ((vsi->info.valid_sections &
+            cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID)) &&
+           ((vsi->info.port_vlan_flags & I40E_AQ_VSI_PVLAN_EMOD_MASK) ==
+            I40E_AQ_VSI_PVLAN_EMOD_MASK))
+               return;  /* already disabled */
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+       vsi->info.port_vlan_flags = I40E_AQ_VSI_PVLAN_MODE_ALL |
+                                   I40E_AQ_VSI_PVLAN_EMOD_NOTHING;
+
+       ctxt.seid = vsi->seid;
+       memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "%s: update vsi failed, aq_err=%d\n",
+                        __func__, vsi->back->hw.aq.asq_last_status);
+       }
+}
+
+/**
+ * i40e_vlan_rx_register - Setup or shutdown vlan offload
+ * @netdev: network interface to be adjusted
+ * @features: netdev features to test if VLAN offload is enabled or not
+ **/
+static void i40e_vlan_rx_register(struct net_device *netdev, u32 features)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       if (features & NETIF_F_HW_VLAN_CTAG_RX)
+               i40e_vlan_stripping_enable(vsi);
+       else
+               i40e_vlan_stripping_disable(vsi);
+}
+
+/**
+ * i40e_vsi_add_vlan - Add vsi membership for given vlan
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be added (0 = untagged only , -1 = any)
+ **/
+int i40e_vsi_add_vlan(struct i40e_vsi *vsi, s16 vid)
+{
+       struct i40e_mac_filter *f, *add_f;
+       bool is_netdev, is_vf;
+       int ret;
+
+       is_vf = (vsi->type == I40E_VSI_SRIOV);
+       is_netdev = !!(vsi->netdev);
+
+       if (is_netdev) {
+               add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, vid,
+                                       is_vf, is_netdev);
+               if (!add_f) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "Could not add vlan filter %d for %pM\n",
+                                vid, vsi->netdev->dev_addr);
+                       return -ENOMEM;
+               }
+       }
+
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               add_f = i40e_add_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
+               if (!add_f) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "Could not add vlan filter %d for %pM\n",
+                                vid, f->macaddr);
+                       return -ENOMEM;
+               }
+       }
+
+       ret = i40e_sync_vsi_filters(vsi);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Could not sync filters for vid %d\n", vid);
+               return ret;
+       }
+
+       /* Now if we add a vlan tag, make sure to check if it is the first
+        * tag (i.e. a "tag" -1 does exist) and if so replace the -1 "tag"
+        * with 0, so we now accept untagged and specified tagged traffic
+        * (and not any taged and untagged)
+        */
+       if (vid > 0) {
+               if (is_netdev && i40e_find_filter(vsi, vsi->netdev->dev_addr,
+                                                 I40E_VLAN_ANY,
+                                                 is_vf, is_netdev)) {
+                       i40e_del_filter(vsi, vsi->netdev->dev_addr,
+                                       I40E_VLAN_ANY, is_vf, is_netdev);
+                       add_f = i40e_add_filter(vsi, vsi->netdev->dev_addr, 0,
+                                               is_vf, is_netdev);
+                       if (!add_f) {
+                               dev_info(&vsi->back->pdev->dev,
+                                        "Could not add filter 0 for %pM\n",
+                                        vsi->netdev->dev_addr);
+                               return -ENOMEM;
+                       }
+               }
+
+               list_for_each_entry(f, &vsi->mac_filter_list, list) {
+                       if (i40e_find_filter(vsi, f->macaddr, I40E_VLAN_ANY,
+                                            is_vf, is_netdev)) {
+                               i40e_del_filter(vsi, f->macaddr, I40E_VLAN_ANY,
+                                               is_vf, is_netdev);
+                               add_f = i40e_add_filter(vsi, f->macaddr,
+                                                       0, is_vf, is_netdev);
+                               if (!add_f) {
+                                       dev_info(&vsi->back->pdev->dev,
+                                                "Could not add filter 0 for %pM\n",
+                                                f->macaddr);
+                                       return -ENOMEM;
+                               }
+                       }
+               }
+               ret = i40e_sync_vsi_filters(vsi);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_vsi_kill_vlan - Remove vsi membership for given vlan
+ * @vsi: the vsi being configured
+ * @vid: vlan id to be removed (0 = untagged only , -1 = any)
+ **/
+int i40e_vsi_kill_vlan(struct i40e_vsi *vsi, s16 vid)
+{
+       struct net_device *netdev = vsi->netdev;
+       struct i40e_mac_filter *f, *add_f;
+       bool is_vf, is_netdev;
+       int filter_count = 0;
+       int ret;
+
+       is_vf = (vsi->type == I40E_VSI_SRIOV);
+       is_netdev = !!(netdev);
+
+       if (is_netdev)
+               i40e_del_filter(vsi, netdev->dev_addr, vid, is_vf, is_netdev);
+
+       list_for_each_entry(f, &vsi->mac_filter_list, list)
+               i40e_del_filter(vsi, f->macaddr, vid, is_vf, is_netdev);
+
+       ret = i40e_sync_vsi_filters(vsi);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev, "Could not sync filters\n");
+               return ret;
+       }
+
+       /* go through all the filters for this VSI and if there is only
+        * vid == 0 it means there are no other filters, so vid 0 must
+        * be replaced with -1. This signifies that we should from now
+        * on accept any traffic (with any tag present, or untagged)
+        */
+       list_for_each_entry(f, &vsi->mac_filter_list, list) {
+               if (is_netdev) {
+                       if (f->vlan &&
+                           ether_addr_equal(netdev->dev_addr, f->macaddr))
+                               filter_count++;
+               }
+
+               if (f->vlan)
+                       filter_count++;
+       }
+
+       if (!filter_count && is_netdev) {
+               i40e_del_filter(vsi, netdev->dev_addr, 0, is_vf, is_netdev);
+               f = i40e_add_filter(vsi, netdev->dev_addr, I40E_VLAN_ANY,
+                                   is_vf, is_netdev);
+               if (!f) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "Could not add filter %d for %pM\n",
+                                I40E_VLAN_ANY, netdev->dev_addr);
+                       return -ENOMEM;
+               }
+       }
+
+       if (!filter_count) {
+               list_for_each_entry(f, &vsi->mac_filter_list, list) {
+                       i40e_del_filter(vsi, f->macaddr, 0, is_vf, is_netdev);
+                       add_f = i40e_add_filter(vsi, f->macaddr, I40E_VLAN_ANY,
+                                           is_vf, is_netdev);
+                       if (!add_f) {
+                               dev_info(&vsi->back->pdev->dev,
+                                        "Could not add filter %d for %pM\n",
+                                        I40E_VLAN_ANY, f->macaddr);
+                               return -ENOMEM;
+                       }
+               }
+       }
+
+       return i40e_sync_vsi_filters(vsi);
+}
+
+/**
+ * i40e_vlan_rx_add_vid - Add a vlan id filter to HW offload
+ * @netdev: network interface to be adjusted
+ * @vid: vlan id to be added
+ **/
+static int i40e_vlan_rx_add_vid(struct net_device *netdev,
+                               __always_unused __be16 proto, u16 vid)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       int ret;
+
+       if (vid > 4095)
+               return 0;
+
+       netdev_info(vsi->netdev, "adding %pM vid=%d\n",
+                   netdev->dev_addr, vid);
+       /* If the network stack called us with vid = 0, we should
+        * indicate to i40e_vsi_add_vlan() that we want to receive
+        * any traffic (i.e. with any vlan tag, or untagged)
+        */
+       ret = i40e_vsi_add_vlan(vsi, vid ? vid : I40E_VLAN_ANY);
+
+       if (!ret) {
+               if (vid < VLAN_N_VID)
+                       set_bit(vid, vsi->active_vlans);
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_vlan_rx_kill_vid - Remove a vlan id filter from HW offload
+ * @netdev: network interface to be adjusted
+ * @vid: vlan id to be removed
+ **/
+static int i40e_vlan_rx_kill_vid(struct net_device *netdev,
+                                __always_unused __be16 proto, u16 vid)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       netdev_info(vsi->netdev, "removing %pM vid=%d\n",
+                   netdev->dev_addr, vid);
+       /* return code is ignored as there is nothing a user
+        * can do about failure to remove and a log message was
+        * already printed from another function
+        */
+       i40e_vsi_kill_vlan(vsi, vid);
+
+       clear_bit(vid, vsi->active_vlans);
+       return 0;
+}
+
+/**
+ * i40e_restore_vlan - Reinstate vlans when vsi/netdev comes back up
+ * @vsi: the vsi being brought back up
+ **/
+static void i40e_restore_vlan(struct i40e_vsi *vsi)
+{
+       u16 vid;
+
+       if (!vsi->netdev)
+               return;
+
+       i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+
+       for_each_set_bit(vid, vsi->active_vlans, VLAN_N_VID)
+               i40e_vlan_rx_add_vid(vsi->netdev, htons(ETH_P_8021Q),
+                                    vid);
+}
+
+/**
+ * i40e_vsi_add_pvid - Add pvid for the VSI
+ * @vsi: the vsi being adjusted
+ * @vid: the vlan id to set as a PVID
+ **/
+i40e_status i40e_vsi_add_pvid(struct i40e_vsi *vsi, u16 vid)
+{
+       struct i40e_vsi_context ctxt;
+       i40e_status ret;
+
+       vsi->info.valid_sections = cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+       vsi->info.pvid = cpu_to_le16(vid);
+       vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_INSERT_PVID;
+       vsi->info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_UNTAGGED;
+
+       ctxt.seid = vsi->seid;
+       memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "%s: update vsi failed, aq_err=%d\n",
+                        __func__, vsi->back->hw.aq.asq_last_status);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_vsi_remove_pvid - Remove the pvid from the VSI
+ * @vsi: the vsi being adjusted
+ *
+ * Just use the vlan_rx_register() service to put it back to normal
+ **/
+void i40e_vsi_remove_pvid(struct i40e_vsi *vsi)
+{
+       vsi->info.pvid = 0;
+       i40e_vlan_rx_register(vsi->netdev, vsi->netdev->features);
+}
+
+/**
+ * i40e_vsi_setup_tx_resources - Allocate VSI Tx queue resources
+ * @vsi: ptr to the VSI
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int i40e_vsi_setup_tx_resources(struct i40e_vsi *vsi)
+{
+       int i, err = 0;
+
+       for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+               err = i40e_setup_tx_descriptors(&vsi->tx_rings[i]);
+
+       return err;
+}
+
+/**
+ * i40e_vsi_free_tx_resources - Free Tx resources for VSI queues
+ * @vsi: ptr to the VSI
+ *
+ * Free VSI's transmit software resources
+ **/
+static void i40e_vsi_free_tx_resources(struct i40e_vsi *vsi)
+{
+       int i;
+
+       for (i = 0; i < vsi->num_queue_pairs; i++)
+               if (vsi->tx_rings[i].desc)
+                       i40e_free_tx_resources(&vsi->tx_rings[i]);
+}
+
+/**
+ * i40e_vsi_setup_rx_resources - Allocate VSI queues Rx resources
+ * @vsi: ptr to the VSI
+ *
+ * If this function returns with an error, then it's possible one or
+ * more of the rings is populated (while the rest are not).  It is the
+ * callers duty to clean those orphaned rings.
+ *
+ * Return 0 on success, negative on failure
+ **/
+static int i40e_vsi_setup_rx_resources(struct i40e_vsi *vsi)
+{
+       int i, err = 0;
+
+       for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+               err = i40e_setup_rx_descriptors(&vsi->rx_rings[i]);
+       return err;
+}
+
+/**
+ * i40e_vsi_free_rx_resources - Free Rx Resources for VSI queues
+ * @vsi: ptr to the VSI
+ *
+ * Free all receive software resources
+ **/
+static void i40e_vsi_free_rx_resources(struct i40e_vsi *vsi)
+{
+       int i;
+
+       for (i = 0; i < vsi->num_queue_pairs; i++)
+               if (vsi->rx_rings[i].desc)
+                       i40e_free_rx_resources(&vsi->rx_rings[i]);
+}
+
+/**
+ * i40e_configure_tx_ring - Configure a transmit ring context and rest
+ * @ring: The Tx ring to configure
+ *
+ * Configure the Tx descriptor ring in the HMC context.
+ **/
+static int i40e_configure_tx_ring(struct i40e_ring *ring)
+{
+       struct i40e_vsi *vsi = ring->vsi;
+       u16 pf_q = vsi->base_queue + ring->queue_index;
+       struct i40e_hw *hw = &vsi->back->hw;
+       struct i40e_hmc_obj_txq tx_ctx;
+       i40e_status err = 0;
+       u32 qtx_ctl = 0;
+
+       /* some ATR related tx ring init */
+       if (vsi->back->flags & I40E_FLAG_FDIR_ATR_ENABLED) {
+               ring->atr_sample_rate = vsi->back->atr_sample_rate;
+               ring->atr_count = 0;
+       } else {
+               ring->atr_sample_rate = 0;
+       }
+
+       /* initialize XPS */
+       if (ring->q_vector && ring->netdev &&
+           !test_and_set_bit(__I40E_TX_XPS_INIT_DONE, &ring->state))
+               netif_set_xps_queue(ring->netdev,
+                                   &ring->q_vector->affinity_mask,
+                                   ring->queue_index);
+
+       /* clear the context structure first */
+       memset(&tx_ctx, 0, sizeof(tx_ctx));
+
+       tx_ctx.new_context = 1;
+       tx_ctx.base = (ring->dma / 128);
+       tx_ctx.qlen = ring->count;
+       tx_ctx.fd_ena = !!(vsi->back->flags & (I40E_FLAG_FDIR_ENABLED |
+                       I40E_FLAG_FDIR_ATR_ENABLED));
+
+       /* As part of VSI creation/update, FW allocates certain
+        * Tx arbitration queue sets for each TC enabled for
+        * the VSI. The FW returns the handles to these queue
+        * sets as part of the response buffer to Add VSI,
+        * Update VSI, etc. AQ commands. It is expected that
+        * these queue set handles be associated with the Tx
+        * queues by the driver as part of the TX queue context
+        * initialization. This has to be done regardless of
+        * DCB as by default everything is mapped to TC0.
+        */
+       tx_ctx.rdylist = le16_to_cpu(vsi->info.qs_handle[ring->dcb_tc]);
+       tx_ctx.rdylist_act = 0;
+
+       /* clear the context in the HMC */
+       err = i40e_clear_lan_tx_queue_context(hw, pf_q);
+       if (err) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to clear LAN Tx queue context on Tx ring %d (pf_q %d), error: %d\n",
+                        ring->queue_index, pf_q, err);
+               return -ENOMEM;
+       }
+
+       /* set the context in the HMC */
+       err = i40e_set_lan_tx_queue_context(hw, pf_q, &tx_ctx);
+       if (err) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to set LAN Tx queue context on Tx ring %d (pf_q %d, error: %d\n",
+                        ring->queue_index, pf_q, err);
+               return -ENOMEM;
+       }
+
+       /* Now associate this queue with this PCI function */
+       qtx_ctl = I40E_QTX_CTL_PF_QUEUE;
+       qtx_ctl |= ((hw->hmc.hmc_fn_id << I40E_QTX_CTL_PF_INDX_SHIFT)
+                                               & I40E_QTX_CTL_PF_INDX_MASK);
+       wr32(hw, I40E_QTX_CTL(pf_q), qtx_ctl);
+       i40e_flush(hw);
+
+       clear_bit(__I40E_HANG_CHECK_ARMED, &ring->state);
+
+       /* cache tail off for easier writes later */
+       ring->tail = hw->hw_addr + I40E_QTX_TAIL(pf_q);
+
+       return 0;
+}
+
+/**
+ * i40e_configure_rx_ring - Configure a receive ring context
+ * @ring: The Rx ring to configure
+ *
+ * Configure the Rx descriptor ring in the HMC context.
+ **/
+static int i40e_configure_rx_ring(struct i40e_ring *ring)
+{
+       struct i40e_vsi *vsi = ring->vsi;
+       u32 chain_len = vsi->back->hw.func_caps.rx_buf_chain_len;
+       u16 pf_q = vsi->base_queue + ring->queue_index;
+       struct i40e_hw *hw = &vsi->back->hw;
+       struct i40e_hmc_obj_rxq rx_ctx;
+       i40e_status err = 0;
+
+       ring->state = 0;
+
+       /* clear the context structure first */
+       memset(&rx_ctx, 0, sizeof(rx_ctx));
+
+       ring->rx_buf_len = vsi->rx_buf_len;
+       ring->rx_hdr_len = vsi->rx_hdr_len;
+
+       rx_ctx.dbuff = ring->rx_buf_len >> I40E_RXQ_CTX_DBUFF_SHIFT;
+       rx_ctx.hbuff = ring->rx_hdr_len >> I40E_RXQ_CTX_HBUFF_SHIFT;
+
+       rx_ctx.base = (ring->dma / 128);
+       rx_ctx.qlen = ring->count;
+
+       if (vsi->back->flags & I40E_FLAG_16BYTE_RX_DESC_ENABLED) {
+               set_ring_16byte_desc_enabled(ring);
+               rx_ctx.dsize = 0;
+       } else {
+               rx_ctx.dsize = 1;
+       }
+
+       rx_ctx.dtype = vsi->dtype;
+       if (vsi->dtype) {
+               set_ring_ps_enabled(ring);
+               rx_ctx.hsplit_0 = I40E_RX_SPLIT_L2      |
+                                 I40E_RX_SPLIT_IP      |
+                                 I40E_RX_SPLIT_TCP_UDP |
+                                 I40E_RX_SPLIT_SCTP;
+       } else {
+               rx_ctx.hsplit_0 = 0;
+       }
+
+       rx_ctx.rxmax = min_t(u16, vsi->max_frame,
+                                 (chain_len * ring->rx_buf_len));
+       rx_ctx.tphrdesc_ena = 1;
+       rx_ctx.tphwdesc_ena = 1;
+       rx_ctx.tphdata_ena = 1;
+       rx_ctx.tphhead_ena = 1;
+       rx_ctx.lrxqthresh = 2;
+       rx_ctx.crcstrip = 1;
+       rx_ctx.l2tsel = 1;
+       rx_ctx.showiv = 1;
+
+       /* clear the context in the HMC */
+       err = i40e_clear_lan_rx_queue_context(hw, pf_q);
+       if (err) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to clear LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
+                        ring->queue_index, pf_q, err);
+               return -ENOMEM;
+       }
+
+       /* set the context in the HMC */
+       err = i40e_set_lan_rx_queue_context(hw, pf_q, &rx_ctx);
+       if (err) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed to set LAN Rx queue context on Rx ring %d (pf_q %d), error: %d\n",
+                        ring->queue_index, pf_q, err);
+               return -ENOMEM;
+       }
+
+       /* cache tail for quicker writes, and clear the reg before use */
+       ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
+       writel(0, ring->tail);
+
+       i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_configure_tx - Configure the VSI for Tx
+ * @vsi: VSI structure describing this set of rings and resources
+ *
+ * Configure the Tx VSI for operation.
+ **/
+static int i40e_vsi_configure_tx(struct i40e_vsi *vsi)
+{
+       int err = 0;
+       u16 i;
+
+       for (i = 0; (i < vsi->num_queue_pairs) && (!err); i++)
+               err = i40e_configure_tx_ring(&vsi->tx_rings[i]);
+
+       return err;
+}
+
+/**
+ * i40e_vsi_configure_rx - Configure the VSI for Rx
+ * @vsi: the VSI being configured
+ *
+ * Configure the Rx VSI for operation.
+ **/
+static int i40e_vsi_configure_rx(struct i40e_vsi *vsi)
+{
+       int err = 0;
+       u16 i;
+
+       if (vsi->netdev && (vsi->netdev->mtu > ETH_DATA_LEN))
+               vsi->max_frame = vsi->netdev->mtu + ETH_HLEN
+                              + ETH_FCS_LEN + VLAN_HLEN;
+       else
+               vsi->max_frame = I40E_RXBUFFER_2048;
+
+       /* figure out correct receive buffer length */
+       switch (vsi->back->flags & (I40E_FLAG_RX_1BUF_ENABLED |
+                                   I40E_FLAG_RX_PS_ENABLED)) {
+       case I40E_FLAG_RX_1BUF_ENABLED:
+               vsi->rx_hdr_len = 0;
+               vsi->rx_buf_len = vsi->max_frame;
+               vsi->dtype = I40E_RX_DTYPE_NO_SPLIT;
+               break;
+       case I40E_FLAG_RX_PS_ENABLED:
+               vsi->rx_hdr_len = I40E_RX_HDR_SIZE;
+               vsi->rx_buf_len = I40E_RXBUFFER_2048;
+               vsi->dtype = I40E_RX_DTYPE_HEADER_SPLIT;
+               break;
+       default:
+               vsi->rx_hdr_len = I40E_RX_HDR_SIZE;
+               vsi->rx_buf_len = I40E_RXBUFFER_2048;
+               vsi->dtype = I40E_RX_DTYPE_SPLIT_ALWAYS;
+               break;
+       }
+
+       /* round up for the chip's needs */
+       vsi->rx_hdr_len = ALIGN(vsi->rx_hdr_len,
+                               (1 << I40E_RXQ_CTX_HBUFF_SHIFT));
+       vsi->rx_buf_len = ALIGN(vsi->rx_buf_len,
+                               (1 << I40E_RXQ_CTX_DBUFF_SHIFT));
+
+       /* set up individual rings */
+       for (i = 0; i < vsi->num_queue_pairs && !err; i++)
+               err = i40e_configure_rx_ring(&vsi->rx_rings[i]);
+
+       return err;
+}
+
+/**
+ * i40e_vsi_config_dcb_rings - Update rings to reflect DCB TC
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_vsi_config_dcb_rings(struct i40e_vsi *vsi)
+{
+       u16 qoffset, qcount;
+       int i, n;
+
+       if (!(vsi->back->flags & I40E_FLAG_DCB_ENABLED))
+               return;
+
+       for (n = 0; n < I40E_MAX_TRAFFIC_CLASS; n++) {
+               if (!(vsi->tc_config.enabled_tc & (1 << n)))
+                       continue;
+
+               qoffset = vsi->tc_config.tc_info[n].qoffset;
+               qcount = vsi->tc_config.tc_info[n].qcount;
+               for (i = qoffset; i < (qoffset + qcount); i++) {
+                       struct i40e_ring *rx_ring = &vsi->rx_rings[i];
+                       struct i40e_ring *tx_ring = &vsi->tx_rings[i];
+                       rx_ring->dcb_tc = n;
+                       tx_ring->dcb_tc = n;
+               }
+       }
+}
+
+/**
+ * i40e_set_vsi_rx_mode - Call set_rx_mode on a VSI
+ * @vsi: ptr to the VSI
+ **/
+static void i40e_set_vsi_rx_mode(struct i40e_vsi *vsi)
+{
+       if (vsi->netdev)
+               i40e_set_rx_mode(vsi->netdev);
+}
+
+/**
+ * i40e_vsi_configure - Set up the VSI for action
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_configure(struct i40e_vsi *vsi)
+{
+       int err;
+
+       i40e_set_vsi_rx_mode(vsi);
+       i40e_restore_vlan(vsi);
+       i40e_vsi_config_dcb_rings(vsi);
+       err = i40e_vsi_configure_tx(vsi);
+       if (!err)
+               err = i40e_vsi_configure_rx(vsi);
+
+       return err;
+}
+
+/**
+ * i40e_vsi_configure_msix - MSIX mode Interrupt Config in the HW
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_configure_msix(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_q_vector *q_vector;
+       struct i40e_hw *hw = &pf->hw;
+       u16 vector;
+       int i, q;
+       u32 val;
+       u32 qp;
+
+       /* The interrupt indexing is offset by 1 in the PFINT_ITRn
+        * and PFINT_LNKLSTn registers, e.g.:
+        *   PFINT_ITRn[0..n-1] gets msix-1..msix-n  (qpair interrupts)
+        */
+       qp = vsi->base_queue;
+       vector = vsi->base_vector;
+       q_vector = vsi->q_vectors;
+       for (i = 0; i < vsi->num_q_vectors; i++, q_vector++, vector++) {
+               q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
+               q_vector->rx.latency_range = I40E_LOW_LATENCY;
+               wr32(hw, I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1),
+                    q_vector->rx.itr);
+               q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
+               q_vector->tx.latency_range = I40E_LOW_LATENCY;
+               wr32(hw, I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1),
+                    q_vector->tx.itr);
+
+               /* Linked list for the queuepairs assigned to this vector */
+               wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), qp);
+               for (q = 0; q < q_vector->num_ringpairs; q++) {
+                       val = I40E_QINT_RQCTL_CAUSE_ENA_MASK |
+                             (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT)  |
+                             (vector      << I40E_QINT_RQCTL_MSIX_INDX_SHIFT) |
+                             (qp          << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)|
+                             (I40E_QUEUE_TYPE_TX
+                                     << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT);
+
+                       wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+                       val = I40E_QINT_TQCTL_CAUSE_ENA_MASK |
+                             (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT)  |
+                             (vector      << I40E_QINT_TQCTL_MSIX_INDX_SHIFT) |
+                             ((qp+1)      << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT)|
+                             (I40E_QUEUE_TYPE_RX
+                                     << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
+
+                       /* Terminate the linked list */
+                       if (q == (q_vector->num_ringpairs - 1))
+                               val |= (I40E_QUEUE_END_OF_LIST
+                                          << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
+
+                       wr32(hw, I40E_QINT_TQCTL(qp), val);
+                       qp++;
+               }
+       }
+
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_enable_misc_int_causes - enable the non-queue interrupts
+ * @hw: ptr to the hardware info
+ **/
+static void i40e_enable_misc_int_causes(struct i40e_hw *hw)
+{
+       u32 val;
+
+       /* clear things first */
+       wr32(hw, I40E_PFINT_ICR0_ENA, 0);  /* disable all */
+       rd32(hw, I40E_PFINT_ICR0);         /* read to clear */
+
+       val = I40E_PFINT_ICR0_ENA_ECC_ERR_MASK       |
+             I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK    |
+             I40E_PFINT_ICR0_ENA_GRST_MASK          |
+             I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK |
+             I40E_PFINT_ICR0_ENA_GPIO_MASK          |
+             I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK  |
+             I40E_PFINT_ICR0_ENA_HMC_ERR_MASK       |
+             I40E_PFINT_ICR0_ENA_VFLR_MASK          |
+             I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+
+       wr32(hw, I40E_PFINT_ICR0_ENA, val);
+
+       /* SW_ITR_IDX = 0, but don't change INTENA */
+       wr32(hw, I40E_PFINT_DYN_CTL0, I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK |
+                                       I40E_PFINT_DYN_CTLN_INTENA_MSK_MASK);
+
+       /* OTHER_ITR_IDX = 0 */
+       wr32(hw, I40E_PFINT_STAT_CTL0, 0);
+}
+
+/**
+ * i40e_configure_msi_and_legacy - Legacy mode interrupt config in the HW
+ * @vsi: the VSI being configured
+ **/
+static void i40e_configure_msi_and_legacy(struct i40e_vsi *vsi)
+{
+       struct i40e_q_vector *q_vector = vsi->q_vectors;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 val;
+
+       /* set the ITR configuration */
+       q_vector->rx.itr = ITR_TO_REG(vsi->rx_itr_setting);
+       q_vector->rx.latency_range = I40E_LOW_LATENCY;
+       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), q_vector->rx.itr);
+       q_vector->tx.itr = ITR_TO_REG(vsi->tx_itr_setting);
+       q_vector->tx.latency_range = I40E_LOW_LATENCY;
+       wr32(hw, I40E_PFINT_ITR0(I40E_TX_ITR), q_vector->tx.itr);
+
+       i40e_enable_misc_int_causes(hw);
+
+       /* FIRSTQ_INDX = 0, FIRSTQ_TYPE = 0 (rx) */
+       wr32(hw, I40E_PFINT_LNKLST0, 0);
+
+       /* Associate the queue pair to the vector and enable the q int */
+       val = I40E_QINT_RQCTL_CAUSE_ENA_MASK                  |
+             (I40E_RX_ITR << I40E_QINT_RQCTL_ITR_INDX_SHIFT) |
+             (I40E_QUEUE_TYPE_TX << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT);
+
+       wr32(hw, I40E_QINT_RQCTL(0), val);
+
+       val = I40E_QINT_TQCTL_CAUSE_ENA_MASK                  |
+             (I40E_TX_ITR << I40E_QINT_TQCTL_ITR_INDX_SHIFT) |
+             (I40E_QUEUE_END_OF_LIST << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT);
+
+       wr32(hw, I40E_QINT_TQCTL(0), val);
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_irq_dynamic_enable_icr0 - Enable default interrupt generation for icr0
+ * @pf: board private structure
+ **/
+static void i40e_irq_dynamic_enable_icr0(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       u32 val;
+
+       val = I40E_PFINT_DYN_CTL0_INTENA_MASK   |
+             I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
+             (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT);
+
+       wr32(hw, I40E_PFINT_DYN_CTL0, val);
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_irq_dynamic_enable - Enable default interrupt generation settings
+ * @vsi: pointer to a vsi
+ * @vector: enable a particular Hw Interrupt vector
+ **/
+void i40e_irq_dynamic_enable(struct i40e_vsi *vsi, int vector)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 val;
+
+       val = I40E_PFINT_DYN_CTLN_INTENA_MASK |
+             I40E_PFINT_DYN_CTLN_CLEARPBA_MASK |
+             (I40E_ITR_NONE << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT);
+       wr32(hw, I40E_PFINT_DYN_CTLN(vector - 1), val);
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_msix_clean_rings - MSIX mode Interrupt Handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t i40e_msix_clean_rings(int irq, void *data)
+{
+       struct i40e_q_vector *q_vector = data;
+
+       if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0])
+               return IRQ_HANDLED;
+
+       napi_schedule(&q_vector->napi);
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * i40e_fdir_clean_rings - Interrupt Handler for FDIR rings
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ **/
+static irqreturn_t i40e_fdir_clean_rings(int irq, void *data)
+{
+       struct i40e_q_vector *q_vector = data;
+
+       if (!q_vector->tx.ring[0] && !q_vector->rx.ring[0])
+               return IRQ_HANDLED;
+
+       pr_info("fdir ring cleaning needed\n");
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * i40e_vsi_request_irq_msix - Initialize MSI-X interrupts
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ *
+ * Allocates MSI-X vectors and requests interrupts from the kernel.
+ **/
+static int i40e_vsi_request_irq_msix(struct i40e_vsi *vsi, char *basename)
+{
+       int q_vectors = vsi->num_q_vectors;
+       struct i40e_pf *pf = vsi->back;
+       int base = vsi->base_vector;
+       int rx_int_idx = 0;
+       int tx_int_idx = 0;
+       int vector, err;
+
+       for (vector = 0; vector < q_vectors; vector++) {
+               struct i40e_q_vector *q_vector = &(vsi->q_vectors[vector]);
+
+               if (q_vector->tx.ring[0] && q_vector->rx.ring[0]) {
+                       snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+                                "%s-%s-%d", basename, "TxRx", rx_int_idx++);
+                       tx_int_idx++;
+               } else if (q_vector->rx.ring[0]) {
+                       snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+                                "%s-%s-%d", basename, "rx", rx_int_idx++);
+               } else if (q_vector->tx.ring[0]) {
+                       snprintf(q_vector->name, sizeof(q_vector->name) - 1,
+                                "%s-%s-%d", basename, "tx", tx_int_idx++);
+               } else {
+                       /* skip this unused q_vector */
+                       continue;
+               }
+               err = request_irq(pf->msix_entries[base + vector].vector,
+                                 vsi->irq_handler,
+                                 0,
+                                 q_vector->name,
+                                 q_vector);
+               if (err) {
+                       dev_info(&pf->pdev->dev,
+                                "%s: request_irq failed, error: %d\n",
+                                __func__, err);
+                       goto free_queue_irqs;
+               }
+               /* assign the mask for this irq */
+               irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
+                                     &q_vector->affinity_mask);
+       }
+
+       return 0;
+
+free_queue_irqs:
+       while (vector) {
+               vector--;
+               irq_set_affinity_hint(pf->msix_entries[base + vector].vector,
+                                     NULL);
+               free_irq(pf->msix_entries[base + vector].vector,
+                        &(vsi->q_vectors[vector]));
+       }
+       return err;
+}
+
+/**
+ * i40e_vsi_disable_irq - Mask off queue interrupt generation on the VSI
+ * @vsi: the VSI being un-configured
+ **/
+static void i40e_vsi_disable_irq(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int base = vsi->base_vector;
+       int i;
+
+       for (i = 0; i < vsi->num_queue_pairs; i++) {
+               wr32(hw, I40E_QINT_TQCTL(vsi->tx_rings[i].reg_idx), 0);
+               wr32(hw, I40E_QINT_RQCTL(vsi->rx_rings[i].reg_idx), 0);
+       }
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               for (i = vsi->base_vector;
+                    i < (vsi->num_q_vectors + vsi->base_vector); i++)
+                       wr32(hw, I40E_PFINT_DYN_CTLN(i - 1), 0);
+
+               i40e_flush(hw);
+               for (i = 0; i < vsi->num_q_vectors; i++)
+                       synchronize_irq(pf->msix_entries[i + base].vector);
+       } else {
+               /* Legacy and MSI mode - this stops all interrupt handling */
+               wr32(hw, I40E_PFINT_ICR0_ENA, 0);
+               wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+               i40e_flush(hw);
+               synchronize_irq(pf->pdev->irq);
+       }
+}
+
+/**
+ * i40e_vsi_enable_irq - Enable IRQ for the given VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_vsi_enable_irq(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       int i;
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               for (i = vsi->base_vector;
+                    i < (vsi->num_q_vectors + vsi->base_vector); i++)
+                       i40e_irq_dynamic_enable(vsi, i);
+       } else {
+               i40e_irq_dynamic_enable_icr0(pf);
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_stop_misc_vector - Stop the vector that handles non-queue events
+ * @pf: board private structure
+ **/
+static void i40e_stop_misc_vector(struct i40e_pf *pf)
+{
+       /* Disable ICR 0 */
+       wr32(&pf->hw, I40E_PFINT_ICR0_ENA, 0);
+       i40e_flush(&pf->hw);
+}
+
+/**
+ * i40e_intr - MSI/Legacy and non-queue interrupt handler
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ *
+ * This is the handler used for all MSI/Legacy interrupts, and deals
+ * with both queue and non-queue interrupts.  This is also used in
+ * MSIX mode to handle the non-queue interrupts.
+ **/
+static irqreturn_t i40e_intr(int irq, void *data)
+{
+       struct i40e_pf *pf = (struct i40e_pf *)data;
+       struct i40e_hw *hw = &pf->hw;
+       u32 icr0, icr0_remaining;
+       u32 val, ena_mask;
+
+       icr0 = rd32(hw, I40E_PFINT_ICR0);
+
+       /* if sharing a legacy IRQ, we might get called w/o an intr pending */
+       if ((icr0 & I40E_PFINT_ICR0_INTEVENT_MASK) == 0)
+               return IRQ_NONE;
+
+       val = rd32(hw, I40E_PFINT_DYN_CTL0);
+       val = val | I40E_PFINT_DYN_CTL0_CLEARPBA_MASK;
+       wr32(hw, I40E_PFINT_DYN_CTL0, val);
+
+       ena_mask = rd32(hw, I40E_PFINT_ICR0_ENA);
+
+       /* only q0 is used in MSI/Legacy mode, and none are used in MSIX */
+       if (icr0 & I40E_PFINT_ICR0_QUEUE_0_MASK) {
+
+               /* temporarily disable queue cause for NAPI processing */
+               u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
+               qval &= ~I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+               wr32(hw, I40E_QINT_RQCTL(0), qval);
+
+               qval = rd32(hw, I40E_QINT_TQCTL(0));
+               qval &= ~I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+               wr32(hw, I40E_QINT_TQCTL(0), qval);
+               i40e_flush(hw);
+
+               if (!test_bit(__I40E_DOWN, &pf->state))
+                       napi_schedule(&pf->vsi[pf->lan_vsi]->q_vectors[0].napi);
+       }
+
+       if (icr0 & I40E_PFINT_ICR0_ADMINQ_MASK) {
+               ena_mask &= ~I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+               set_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
+       }
+
+       if (icr0 & I40E_PFINT_ICR0_MAL_DETECT_MASK) {
+               ena_mask &= ~I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
+               set_bit(__I40E_MDD_EVENT_PENDING, &pf->state);
+       }
+
+       if (icr0 & I40E_PFINT_ICR0_VFLR_MASK) {
+               ena_mask &= ~I40E_PFINT_ICR0_ENA_VFLR_MASK;
+               set_bit(__I40E_VFLR_EVENT_PENDING, &pf->state);
+       }
+
+       if (icr0 & I40E_PFINT_ICR0_GRST_MASK) {
+               if (!test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state))
+                       set_bit(__I40E_RESET_INTR_RECEIVED, &pf->state);
+               ena_mask &= ~I40E_PFINT_ICR0_ENA_GRST_MASK;
+               val = rd32(hw, I40E_GLGEN_RSTAT);
+               val = (val & I40E_GLGEN_RSTAT_RESET_TYPE_MASK)
+                      >> I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT;
+               if (val & I40E_RESET_CORER)
+                       pf->corer_count++;
+               else if (val & I40E_RESET_GLOBR)
+                       pf->globr_count++;
+               else if (val & I40E_RESET_EMPR)
+                       pf->empr_count++;
+       }
+
+       /* If a critical error is pending we have no choice but to reset the
+        * device.
+        * Report and mask out any remaining unexpected interrupts.
+        */
+       icr0_remaining = icr0 & ena_mask;
+       if (icr0_remaining) {
+               dev_info(&pf->pdev->dev, "unhandled interrupt icr0=0x%08x\n",
+                        icr0_remaining);
+               if ((icr0_remaining & I40E_PFINT_ICR0_HMC_ERR_MASK) ||
+                   (icr0_remaining & I40E_PFINT_ICR0_PE_CRITERR_MASK) ||
+                   (icr0_remaining & I40E_PFINT_ICR0_PCI_EXCEPTION_MASK) ||
+                   (icr0_remaining & I40E_PFINT_ICR0_ECC_ERR_MASK) ||
+                   (icr0_remaining & I40E_PFINT_ICR0_MAL_DETECT_MASK)) {
+                       if (icr0 & I40E_PFINT_ICR0_HMC_ERR_MASK) {
+                               dev_info(&pf->pdev->dev, "HMC error interrupt\n");
+                       } else {
+                               dev_info(&pf->pdev->dev, "device will be reset\n");
+                               set_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+                               i40e_service_event_schedule(pf);
+                       }
+               }
+               ena_mask &= ~icr0_remaining;
+       }
+
+       /* re-enable interrupt causes */
+       wr32(hw, I40E_PFINT_ICR0_ENA, ena_mask);
+       i40e_flush(hw);
+       if (!test_bit(__I40E_DOWN, &pf->state)) {
+               i40e_service_event_schedule(pf);
+               i40e_irq_dynamic_enable_icr0(pf);
+       }
+
+       return IRQ_HANDLED;
+}
+
+/**
+ * i40e_map_vector_to_rxq - Assigns the Rx queue to the vector
+ * @vsi: the VSI being configured
+ * @v_idx: vector index
+ * @r_idx: rx queue index
+ **/
+static void map_vector_to_rxq(struct i40e_vsi *vsi, int v_idx, int r_idx)
+{
+       struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]);
+       struct i40e_ring *rx_ring = &(vsi->rx_rings[r_idx]);
+
+       rx_ring->q_vector = q_vector;
+       q_vector->rx.ring[q_vector->rx.count] = rx_ring;
+       q_vector->rx.count++;
+       q_vector->rx.latency_range = I40E_LOW_LATENCY;
+       q_vector->vsi = vsi;
+}
+
+/**
+ * i40e_map_vector_to_txq - Assigns the Tx queue to the vector
+ * @vsi: the VSI being configured
+ * @v_idx: vector index
+ * @t_idx: tx queue index
+ **/
+static void map_vector_to_txq(struct i40e_vsi *vsi, int v_idx, int t_idx)
+{
+       struct i40e_q_vector *q_vector = &(vsi->q_vectors[v_idx]);
+       struct i40e_ring *tx_ring = &(vsi->tx_rings[t_idx]);
+
+       tx_ring->q_vector = q_vector;
+       q_vector->tx.ring[q_vector->tx.count] = tx_ring;
+       q_vector->tx.count++;
+       q_vector->tx.latency_range = I40E_LOW_LATENCY;
+       q_vector->num_ringpairs++;
+       q_vector->vsi = vsi;
+}
+
+/**
+ * i40e_vsi_map_rings_to_vectors - Maps descriptor rings to vectors
+ * @vsi: the VSI being configured
+ *
+ * This function maps descriptor rings to the queue-specific vectors
+ * we were allotted through the MSI-X enabling code.  Ideally, we'd have
+ * one vector per queue pair, but on a constrained vector budget, we
+ * group the queue pairs as "efficiently" as possible.
+ **/
+static void i40e_vsi_map_rings_to_vectors(struct i40e_vsi *vsi)
+{
+       int qp_remaining = vsi->num_queue_pairs;
+       int q_vectors = vsi->num_q_vectors;
+       int qp_per_vector;
+       int v_start = 0;
+       int qp_idx = 0;
+
+       /* If we don't have enough vectors for a 1-to-1 mapping, we'll have to
+        * group them so there are multiple queues per vector.
+        */
+       for (; v_start < q_vectors && qp_remaining; v_start++) {
+               qp_per_vector = DIV_ROUND_UP(qp_remaining, q_vectors - v_start);
+               for (; qp_per_vector;
+                    qp_per_vector--, qp_idx++, qp_remaining--) {
+                       map_vector_to_rxq(vsi, v_start, qp_idx);
+                       map_vector_to_txq(vsi, v_start, qp_idx);
+               }
+       }
+}
+
+/**
+ * i40e_vsi_request_irq - Request IRQ from the OS
+ * @vsi: the VSI being configured
+ * @basename: name for the vector
+ **/
+static int i40e_vsi_request_irq(struct i40e_vsi *vsi, char *basename)
+{
+       struct i40e_pf *pf = vsi->back;
+       int err;
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               err = i40e_vsi_request_irq_msix(vsi, basename);
+       else if (pf->flags & I40E_FLAG_MSI_ENABLED)
+               err = request_irq(pf->pdev->irq, i40e_intr, 0,
+                                 pf->misc_int_name, pf);
+       else
+               err = request_irq(pf->pdev->irq, i40e_intr, IRQF_SHARED,
+                                 pf->misc_int_name, pf);
+
+       if (err)
+               dev_info(&pf->pdev->dev, "request_irq failed, Error %d\n", err);
+
+       return err;
+}
+
+#ifdef CONFIG_NET_POLL_CONTROLLER
+/**
+ * i40e_netpoll - A Polling 'interrupt'handler
+ * @netdev: network interface device structure
+ *
+ * This is used by netconsole to send skbs without having to re-enable
+ * interrupts.  It's not called while the normal interrupt routine is executing.
+ **/
+static void i40e_netpoll(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       int i;
+
+       /* if interface is down do nothing */
+       if (test_bit(__I40E_DOWN, &vsi->state))
+               return;
+
+       pf->flags |= I40E_FLAG_IN_NETPOLL;
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               for (i = 0; i < vsi->num_q_vectors; i++)
+                       i40e_msix_clean_rings(0, &vsi->q_vectors[i]);
+       } else {
+               i40e_intr(pf->pdev->irq, netdev);
+       }
+       pf->flags &= ~I40E_FLAG_IN_NETPOLL;
+}
+#endif
+
+/**
+ * i40e_vsi_control_tx - Start or stop a VSI's rings
+ * @vsi: the VSI being configured
+ * @enable: start or stop the rings
+ **/
+static int i40e_vsi_control_tx(struct i40e_vsi *vsi, bool enable)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int i, j, pf_q;
+       u32 tx_reg;
+
+       pf_q = vsi->base_queue;
+       for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+               j = 1000;
+               do {
+                       usleep_range(1000, 2000);
+                       tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
+               } while (j-- && ((tx_reg >> I40E_QTX_ENA_QENA_REQ_SHIFT)
+                              ^ (tx_reg >> I40E_QTX_ENA_QENA_STAT_SHIFT)) & 1);
+
+               if (enable) {
+                       /* is STAT set ? */
+                       if ((tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) {
+                               dev_info(&pf->pdev->dev,
+                                        "Tx %d already enabled\n", i);
+                               continue;
+                       }
+               } else {
+                       /* is !STAT set ? */
+                       if (!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK)) {
+                               dev_info(&pf->pdev->dev,
+                                        "Tx %d already disabled\n", i);
+                               continue;
+                       }
+               }
+
+               /* turn on/off the queue */
+               if (enable)
+                       tx_reg |= I40E_QTX_ENA_QENA_REQ_MASK |
+                                 I40E_QTX_ENA_QENA_STAT_MASK;
+               else
+                       tx_reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
+
+               wr32(hw, I40E_QTX_ENA(pf_q), tx_reg);
+
+               /* wait for the change to finish */
+               for (j = 0; j < 10; j++) {
+                       tx_reg = rd32(hw, I40E_QTX_ENA(pf_q));
+                       if (enable) {
+                               if ((tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+                                       break;
+                       } else {
+                               if (!(tx_reg & I40E_QTX_ENA_QENA_STAT_MASK))
+                                       break;
+                       }
+
+                       udelay(10);
+               }
+               if (j >= 10) {
+                       dev_info(&pf->pdev->dev, "Tx ring %d %sable timeout\n",
+                                pf_q, (enable ? "en" : "dis"));
+                       return -ETIMEDOUT;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_control_rx - Start or stop a VSI's rings
+ * @vsi: the VSI being configured
+ * @enable: start or stop the rings
+ **/
+static int i40e_vsi_control_rx(struct i40e_vsi *vsi, bool enable)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int i, j, pf_q;
+       u32 rx_reg;
+
+       pf_q = vsi->base_queue;
+       for (i = 0; i < vsi->num_queue_pairs; i++, pf_q++) {
+               j = 1000;
+               do {
+                       usleep_range(1000, 2000);
+                       rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
+               } while (j-- && ((rx_reg >> I40E_QRX_ENA_QENA_REQ_SHIFT)
+                              ^ (rx_reg >> I40E_QRX_ENA_QENA_STAT_SHIFT)) & 1);
+
+               if (enable) {
+                       /* is STAT set ? */
+                       if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+                               continue;
+               } else {
+                       /* is !STAT set ? */
+                       if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+                               continue;
+               }
+
+               /* turn on/off the queue */
+               if (enable)
+                       rx_reg |= I40E_QRX_ENA_QENA_REQ_MASK |
+                                 I40E_QRX_ENA_QENA_STAT_MASK;
+               else
+                       rx_reg &= ~(I40E_QRX_ENA_QENA_REQ_MASK |
+                                 I40E_QRX_ENA_QENA_STAT_MASK);
+               wr32(hw, I40E_QRX_ENA(pf_q), rx_reg);
+
+               /* wait for the change to finish */
+               for (j = 0; j < 10; j++) {
+                       rx_reg = rd32(hw, I40E_QRX_ENA(pf_q));
+
+                       if (enable) {
+                               if ((rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+                                       break;
+                       } else {
+                               if (!(rx_reg & I40E_QRX_ENA_QENA_STAT_MASK))
+                                       break;
+                       }
+
+                       udelay(10);
+               }
+               if (j >= 10) {
+                       dev_info(&pf->pdev->dev, "Rx ring %d %sable timeout\n",
+                                pf_q, (enable ? "en" : "dis"));
+                       return -ETIMEDOUT;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_control_rings - Start or stop a VSI's rings
+ * @vsi: the VSI being configured
+ * @enable: start or stop the rings
+ **/
+static int i40e_vsi_control_rings(struct i40e_vsi *vsi, bool request)
+{
+       int ret;
+
+       /* do rx first for enable and last for disable */
+       if (request) {
+               ret = i40e_vsi_control_rx(vsi, request);
+               if (ret)
+                       return ret;
+               ret = i40e_vsi_control_tx(vsi, request);
+       } else {
+               ret = i40e_vsi_control_tx(vsi, request);
+               if (ret)
+                       return ret;
+               ret = i40e_vsi_control_rx(vsi, request);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_vsi_free_irq - Free the irq association with the OS
+ * @vsi: the VSI being configured
+ **/
+static void i40e_vsi_free_irq(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       int base = vsi->base_vector;
+       u32 val, qp;
+       int i;
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               if (!vsi->q_vectors)
+                       return;
+
+               for (i = 0; i < vsi->num_q_vectors; i++) {
+                       u16 vector = i + base;
+
+                       /* free only the irqs that were actually requested */
+                       if (vsi->q_vectors[i].num_ringpairs == 0)
+                               continue;
+
+                       /* clear the affinity_mask in the IRQ descriptor */
+                       irq_set_affinity_hint(pf->msix_entries[vector].vector,
+                                             NULL);
+                       free_irq(pf->msix_entries[vector].vector,
+                                &vsi->q_vectors[i]);
+
+                       /* Tear down the interrupt queue link list
+                        *
+                        * We know that they come in pairs and always
+                        * the Rx first, then the Tx.  To clear the
+                        * link list, stick the EOL value into the
+                        * next_q field of the registers.
+                        */
+                       val = rd32(hw, I40E_PFINT_LNKLSTN(vector - 1));
+                       qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
+                               >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+                       val |= I40E_QUEUE_END_OF_LIST
+                               << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+                       wr32(hw, I40E_PFINT_LNKLSTN(vector - 1), val);
+
+                       while (qp != I40E_QUEUE_END_OF_LIST) {
+                               u32 next;
+
+                               val = rd32(hw, I40E_QINT_RQCTL(qp));
+
+                               val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
+                                        I40E_QINT_RQCTL_MSIX0_INDX_MASK |
+                                        I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
+                                        I40E_QINT_RQCTL_INTEVENT_MASK);
+
+                               val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
+                                        I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
+
+                               wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+                               val = rd32(hw, I40E_QINT_TQCTL(qp));
+
+                               next = (val & I40E_QINT_TQCTL_NEXTQ_INDX_MASK)
+                                       >> I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT;
+
+                               val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
+                                        I40E_QINT_TQCTL_MSIX0_INDX_MASK |
+                                        I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
+                                        I40E_QINT_TQCTL_INTEVENT_MASK);
+
+                               val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
+                                        I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
+
+                               wr32(hw, I40E_QINT_TQCTL(qp), val);
+                               qp = next;
+                       }
+               }
+       } else {
+               free_irq(pf->pdev->irq, pf);
+
+               val = rd32(hw, I40E_PFINT_LNKLST0);
+               qp = (val & I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK)
+                       >> I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT;
+               val |= I40E_QUEUE_END_OF_LIST
+                       << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT;
+               wr32(hw, I40E_PFINT_LNKLST0, val);
+
+               val = rd32(hw, I40E_QINT_RQCTL(qp));
+               val &= ~(I40E_QINT_RQCTL_MSIX_INDX_MASK  |
+                        I40E_QINT_RQCTL_MSIX0_INDX_MASK |
+                        I40E_QINT_RQCTL_CAUSE_ENA_MASK  |
+                        I40E_QINT_RQCTL_INTEVENT_MASK);
+
+               val |= (I40E_QINT_RQCTL_ITR_INDX_MASK |
+                       I40E_QINT_RQCTL_NEXTQ_INDX_MASK);
+
+               wr32(hw, I40E_QINT_RQCTL(qp), val);
+
+               val = rd32(hw, I40E_QINT_TQCTL(qp));
+
+               val &= ~(I40E_QINT_TQCTL_MSIX_INDX_MASK  |
+                        I40E_QINT_TQCTL_MSIX0_INDX_MASK |
+                        I40E_QINT_TQCTL_CAUSE_ENA_MASK  |
+                        I40E_QINT_TQCTL_INTEVENT_MASK);
+
+               val |= (I40E_QINT_TQCTL_ITR_INDX_MASK |
+                       I40E_QINT_TQCTL_NEXTQ_INDX_MASK);
+
+               wr32(hw, I40E_QINT_TQCTL(qp), val);
+       }
+}
+
+/**
+ * i40e_vsi_free_q_vectors - Free memory allocated for interrupt vectors
+ * @vsi: the VSI being un-configured
+ *
+ * This frees the memory allocated to the q_vectors and
+ * deletes references to the NAPI struct.
+ **/
+static void i40e_vsi_free_q_vectors(struct i40e_vsi *vsi)
+{
+       int v_idx;
+
+       for (v_idx = 0; v_idx < vsi->num_q_vectors; v_idx++) {
+               struct i40e_q_vector *q_vector = &vsi->q_vectors[v_idx];
+               int r_idx;
+
+               if (!q_vector)
+                       continue;
+
+               /* disassociate q_vector from rings */
+               for (r_idx = 0; r_idx < q_vector->tx.count; r_idx++)
+                       q_vector->tx.ring[r_idx]->q_vector = NULL;
+               for (r_idx = 0; r_idx < q_vector->rx.count; r_idx++)
+                       q_vector->rx.ring[r_idx]->q_vector = NULL;
+
+               /* only VSI w/ an associated netdev is set up w/ NAPI */
+               if (vsi->netdev)
+                       netif_napi_del(&q_vector->napi);
+       }
+       kfree(vsi->q_vectors);
+}
+
+/**
+ * i40e_reset_interrupt_capability - Disable interrupt setup in OS
+ * @pf: board private structure
+ **/
+static void i40e_reset_interrupt_capability(struct i40e_pf *pf)
+{
+       /* If we're in Legacy mode, the interrupt was cleaned in vsi_close */
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               pci_disable_msix(pf->pdev);
+               kfree(pf->msix_entries);
+               pf->msix_entries = NULL;
+       } else if (pf->flags & I40E_FLAG_MSI_ENABLED) {
+               pci_disable_msi(pf->pdev);
+       }
+       pf->flags &= ~(I40E_FLAG_MSIX_ENABLED | I40E_FLAG_MSI_ENABLED);
+}
+
+/**
+ * i40e_clear_interrupt_scheme - Clear the current interrupt scheme settings
+ * @pf: board private structure
+ *
+ * We go through and clear interrupt specific resources and reset the structure
+ * to pre-load conditions
+ **/
+static void i40e_clear_interrupt_scheme(struct i40e_pf *pf)
+{
+       int i;
+
+       i40e_put_lump(pf->irq_pile, 0, I40E_PILE_VALID_BIT-1);
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i])
+                       i40e_vsi_free_q_vectors(pf->vsi[i]);
+       i40e_reset_interrupt_capability(pf);
+}
+
+/**
+ * i40e_napi_enable_all - Enable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ **/
+static void i40e_napi_enable_all(struct i40e_vsi *vsi)
+{
+       int q_idx;
+
+       if (!vsi->netdev)
+               return;
+
+       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+               napi_enable(&vsi->q_vectors[q_idx].napi);
+}
+
+/**
+ * i40e_napi_disable_all - Disable NAPI for all q_vectors in the VSI
+ * @vsi: the VSI being configured
+ **/
+static void i40e_napi_disable_all(struct i40e_vsi *vsi)
+{
+       int q_idx;
+
+       if (!vsi->netdev)
+               return;
+
+       for (q_idx = 0; q_idx < vsi->num_q_vectors; q_idx++)
+               napi_disable(&vsi->q_vectors[q_idx].napi);
+}
+
+/**
+ * i40e_quiesce_vsi - Pause a given VSI
+ * @vsi: the VSI being paused
+ **/
+static void i40e_quiesce_vsi(struct i40e_vsi *vsi)
+{
+       if (test_bit(__I40E_DOWN, &vsi->state))
+               return;
+
+       set_bit(__I40E_NEEDS_RESTART, &vsi->state);
+       if (vsi->netdev && netif_running(vsi->netdev)) {
+               vsi->netdev->netdev_ops->ndo_stop(vsi->netdev);
+       } else {
+               set_bit(__I40E_DOWN, &vsi->state);
+               i40e_down(vsi);
+       }
+}
+
+/**
+ * i40e_unquiesce_vsi - Resume a given VSI
+ * @vsi: the VSI being resumed
+ **/
+static void i40e_unquiesce_vsi(struct i40e_vsi *vsi)
+{
+       if (!test_bit(__I40E_NEEDS_RESTART, &vsi->state))
+               return;
+
+       clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
+       if (vsi->netdev && netif_running(vsi->netdev))
+               vsi->netdev->netdev_ops->ndo_open(vsi->netdev);
+       else
+               i40e_up(vsi);   /* this clears the DOWN bit */
+}
+
+/**
+ * i40e_pf_quiesce_all_vsi - Pause all VSIs on a PF
+ * @pf: the PF
+ **/
+static void i40e_pf_quiesce_all_vsi(struct i40e_pf *pf)
+{
+       int v;
+
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               if (pf->vsi[v])
+                       i40e_quiesce_vsi(pf->vsi[v]);
+       }
+}
+
+/**
+ * i40e_pf_unquiesce_all_vsi - Resume all VSIs on a PF
+ * @pf: the PF
+ **/
+static void i40e_pf_unquiesce_all_vsi(struct i40e_pf *pf)
+{
+       int v;
+
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               if (pf->vsi[v])
+                       i40e_unquiesce_vsi(pf->vsi[v]);
+       }
+}
+
+/**
+ * i40e_dcb_get_num_tc -  Get the number of TCs from DCBx config
+ * @dcbcfg: the corresponding DCBx configuration structure
+ *
+ * Return the number of TCs from given DCBx configuration
+ **/
+static u8 i40e_dcb_get_num_tc(struct i40e_dcbx_config *dcbcfg)
+{
+       int num_tc = 0, i;
+
+       /* Scan the ETS Config Priority Table to find
+        * traffic class enabled for a given priority
+        * and use the traffic class index to get the
+        * number of traffic classes enabled
+        */
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+               if (dcbcfg->etscfg.prioritytable[i] > num_tc)
+                       num_tc = dcbcfg->etscfg.prioritytable[i];
+       }
+
+       /* Traffic class index starts from zero so
+        * increment to return the actual count
+        */
+       num_tc++;
+
+       return num_tc;
+}
+
+/**
+ * i40e_dcb_get_enabled_tc - Get enabled traffic classes
+ * @dcbcfg: the corresponding DCBx configuration structure
+ *
+ * Query the current DCB configuration and return the number of
+ * traffic classes enabled from the given DCBX config
+ **/
+static u8 i40e_dcb_get_enabled_tc(struct i40e_dcbx_config *dcbcfg)
+{
+       u8 num_tc = i40e_dcb_get_num_tc(dcbcfg);
+       u8 enabled_tc = 1;
+       u8 i;
+
+       for (i = 0; i < num_tc; i++)
+               enabled_tc |= 1 << i;
+
+       return enabled_tc;
+}
+
+/**
+ * i40e_pf_get_num_tc - Get enabled traffic classes for PF
+ * @pf: PF being queried
+ *
+ * Return number of traffic classes enabled for the given PF
+ **/
+static u8 i40e_pf_get_num_tc(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       u8 i, enabled_tc;
+       u8 num_tc = 0;
+       struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+       /* If DCB is not enabled then always in single TC */
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+               return 1;
+
+       /* MFP mode return count of enabled TCs for this PF */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+               enabled_tc = pf->hw.func_caps.enabled_tcmap;
+               for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+                       if (enabled_tc & (1 << i))
+                               num_tc++;
+               }
+               return num_tc;
+       }
+
+       /* SFP mode will be enabled for all TCs on port */
+       return i40e_dcb_get_num_tc(dcbcfg);
+}
+
+/**
+ * i40e_pf_get_default_tc - Get bitmap for first enabled TC
+ * @pf: PF being queried
+ *
+ * Return a bitmap for first enabled traffic class for this PF.
+ **/
+static u8 i40e_pf_get_default_tc(struct i40e_pf *pf)
+{
+       u8 enabled_tc = pf->hw.func_caps.enabled_tcmap;
+       u8 i = 0;
+
+       if (!enabled_tc)
+               return 0x1; /* TC0 */
+
+       /* Find the first enabled TC */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (enabled_tc & (1 << i))
+                       break;
+       }
+
+       return 1 << i;
+}
+
+/**
+ * i40e_pf_get_pf_tc_map - Get bitmap for enabled traffic classes
+ * @pf: PF being queried
+ *
+ * Return a bitmap for enabled traffic classes for this PF.
+ **/
+static u8 i40e_pf_get_tc_map(struct i40e_pf *pf)
+{
+       /* If DCB is not enabled for this PF then just return default TC */
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED))
+               return i40e_pf_get_default_tc(pf);
+
+       /* MFP mode will have enabled TCs set by FW */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED)
+               return pf->hw.func_caps.enabled_tcmap;
+
+       /* SFP mode we want PF to be enabled for all TCs */
+       return i40e_dcb_get_enabled_tc(&pf->hw.local_dcbx_config);
+}
+
+/**
+ * i40e_vsi_get_bw_info - Query VSI BW Information
+ * @vsi: the VSI being queried
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_vsi_get_bw_info(struct i40e_vsi *vsi)
+{
+       struct i40e_aqc_query_vsi_ets_sla_config_resp bw_ets_config = {0};
+       struct i40e_aqc_query_vsi_bw_config_resp bw_config = {0};
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u32 tc_bw_max;
+       int ret;
+       int i;
+
+       /* Get the VSI level BW configuration */
+       ret = i40e_aq_query_vsi_bw_config(hw, vsi->seid, &bw_config, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "couldn't get pf vsi bw config, err %d, aq_err %d\n",
+                        ret, pf->hw.aq.asq_last_status);
+               return ret;
+       }
+
+       /* Get the VSI level BW configuration per TC */
+       ret = i40e_aq_query_vsi_ets_sla_config(hw, vsi->seid,
+                                              &bw_ets_config,
+                                              NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "couldn't get pf vsi ets bw config, err %d, aq_err %d\n",
+                        ret, pf->hw.aq.asq_last_status);
+               return ret;
+       }
+
+       if (bw_config.tc_valid_bits != bw_ets_config.tc_valid_bits) {
+               dev_info(&pf->pdev->dev,
+                        "Enabled TCs mismatch from querying VSI BW info 0x%08x 0x%08x\n",
+                        bw_config.tc_valid_bits,
+                        bw_ets_config.tc_valid_bits);
+               /* Still continuing */
+       }
+
+       vsi->bw_limit = le16_to_cpu(bw_config.port_bw_limit);
+       vsi->bw_max_quanta = bw_config.max_bw;
+       tc_bw_max = le16_to_cpu(bw_ets_config.tc_bw_max[0]) |
+                   (le16_to_cpu(bw_ets_config.tc_bw_max[1]) << 16);
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               vsi->bw_ets_share_credits[i] = bw_ets_config.share_credits[i];
+               vsi->bw_ets_limit_credits[i] =
+                                       le16_to_cpu(bw_ets_config.credits[i]);
+               /* 3 bits out of 4 for each TC */
+               vsi->bw_ets_max_quanta[i] = (u8)((tc_bw_max >> (i*4)) & 0x7);
+       }
+       return ret;
+}
+
+/**
+ * i40e_vsi_configure_bw_alloc - Configure VSI BW allocation per TC
+ * @vsi: the VSI being configured
+ * @enabled_tc: TC bitmap
+ * @bw_credits: BW shared credits per TC
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_vsi_configure_bw_alloc(struct i40e_vsi *vsi,
+                                      u8 enabled_tc,
+                                      u8 *bw_share)
+{
+       struct i40e_aqc_configure_vsi_tc_bw_data bw_data;
+       int i, ret = 0;
+
+       bw_data.tc_valid_bits = enabled_tc;
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               bw_data.tc_bw_credits[i] = bw_share[i];
+
+       ret = i40e_aq_config_vsi_tc_bw(&vsi->back->hw, vsi->seid,
+                                      &bw_data, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "%s: AQ command Config VSI BW allocation per TC failed = %d\n",
+                        __func__, vsi->back->hw.aq.asq_last_status);
+               return ret;
+       }
+
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++)
+               vsi->info.qs_handle[i] = bw_data.qs_handles[i];
+
+       return ret;
+}
+
+/**
+ * i40e_vsi_config_netdev_tc - Setup the netdev TC configuration
+ * @vsi: the VSI being configured
+ * @enabled_tc: TC map to be enabled
+ *
+ **/
+static void i40e_vsi_config_netdev_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+{
+       struct net_device *netdev = vsi->netdev;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       u8 netdev_tc = 0;
+       int i;
+       struct i40e_dcbx_config *dcbcfg = &hw->local_dcbx_config;
+
+       if (!netdev)
+               return;
+
+       if (!enabled_tc) {
+               netdev_reset_tc(netdev);
+               return;
+       }
+
+       /* Set up actual enabled TCs on the VSI */
+       if (netdev_set_num_tc(netdev, vsi->tc_config.numtc))
+               return;
+
+       /* set per TC queues for the VSI */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               /* Only set TC queues for enabled tcs
+                *
+                * e.g. For a VSI that has TC0 and TC3 enabled the
+                * enabled_tc bitmap would be 0x00001001; the driver
+                * will set the numtc for netdev as 2 that will be
+                * referenced by the netdev layer as TC 0 and 1.
+                */
+               if (vsi->tc_config.enabled_tc & (1 << i))
+                       netdev_set_tc_queue(netdev,
+                                       vsi->tc_config.tc_info[i].netdev_tc,
+                                       vsi->tc_config.tc_info[i].qcount,
+                                       vsi->tc_config.tc_info[i].qoffset);
+       }
+
+       /* Assign UP2TC map for the VSI */
+       for (i = 0; i < I40E_MAX_USER_PRIORITY; i++) {
+               /* Get the actual TC# for the UP */
+               u8 ets_tc = dcbcfg->etscfg.prioritytable[i];
+               /* Get the mapped netdev TC# for the UP */
+               netdev_tc =  vsi->tc_config.tc_info[ets_tc].netdev_tc;
+               netdev_set_prio_tc_map(netdev, i, netdev_tc);
+       }
+}
+
+/**
+ * i40e_vsi_update_queue_map - Update our copy of VSi info with new queue map
+ * @vsi: the VSI being configured
+ * @ctxt: the ctxt buffer returned from AQ VSI update param command
+ **/
+static void i40e_vsi_update_queue_map(struct i40e_vsi *vsi,
+                                     struct i40e_vsi_context *ctxt)
+{
+       /* copy just the sections touched not the entire info
+        * since not all sections are valid as returned by
+        * update vsi params
+        */
+       vsi->info.mapping_flags = ctxt->info.mapping_flags;
+       memcpy(&vsi->info.queue_mapping,
+              &ctxt->info.queue_mapping, sizeof(vsi->info.queue_mapping));
+       memcpy(&vsi->info.tc_mapping, ctxt->info.tc_mapping,
+              sizeof(vsi->info.tc_mapping));
+}
+
+/**
+ * i40e_vsi_config_tc - Configure VSI Tx Scheduler for given TC map
+ * @vsi: VSI to be configured
+ * @enabled_tc: TC bitmap
+ *
+ * This configures a particular VSI for TCs that are mapped to the
+ * given TC bitmap. It uses default bandwidth share for TCs across
+ * VSIs to configure TC for a particular VSI.
+ *
+ * NOTE:
+ * It is expected that the VSI queues have been quisced before calling
+ * this function.
+ **/
+static int i40e_vsi_config_tc(struct i40e_vsi *vsi, u8 enabled_tc)
+{
+       u8 bw_share[I40E_MAX_TRAFFIC_CLASS] = {0};
+       struct i40e_vsi_context ctxt;
+       int ret = 0;
+       int i;
+
+       /* Check if enabled_tc is same as existing or new TCs */
+       if (vsi->tc_config.enabled_tc == enabled_tc)
+               return ret;
+
+       /* Enable ETS TCs with equal BW Share for now across all VSIs */
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               if (enabled_tc & (1 << i))
+                       bw_share[i] = 1;
+       }
+
+       ret = i40e_vsi_configure_bw_alloc(vsi, enabled_tc, bw_share);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed configuring TC map %d for VSI %d\n",
+                        enabled_tc, vsi->seid);
+               goto out;
+       }
+
+       /* Update Queue Pairs Mapping for currently enabled UPs */
+       ctxt.seid = vsi->seid;
+       ctxt.pf_num = vsi->back->hw.pf_id;
+       ctxt.vf_num = 0;
+       ctxt.uplink_seid = vsi->uplink_seid;
+       memcpy(&ctxt.info, &vsi->info, sizeof(vsi->info));
+       i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+
+       /* Update the VSI after updating the VSI queue-mapping information */
+       ret = i40e_aq_update_vsi_params(&vsi->back->hw, &ctxt, NULL);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "update vsi failed, aq_err=%d\n",
+                        vsi->back->hw.aq.asq_last_status);
+               goto out;
+       }
+       /* update the local VSI info with updated queue map */
+       i40e_vsi_update_queue_map(vsi, &ctxt);
+       vsi->info.valid_sections = 0;
+
+       /* Update current VSI BW information */
+       ret = i40e_vsi_get_bw_info(vsi);
+       if (ret) {
+               dev_info(&vsi->back->pdev->dev,
+                        "Failed updating vsi bw info, aq_err=%d\n",
+                        vsi->back->hw.aq.asq_last_status);
+               goto out;
+       }
+
+       /* Update the netdev TC setup */
+       i40e_vsi_config_netdev_tc(vsi, enabled_tc);
+out:
+       return ret;
+}
+
+/**
+ * i40e_up_complete - Finish the last steps of bringing up a connection
+ * @vsi: the VSI being configured
+ **/
+static int i40e_up_complete(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       int err;
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               i40e_vsi_configure_msix(vsi);
+       else
+               i40e_configure_msi_and_legacy(vsi);
+
+       /* start rings */
+       err = i40e_vsi_control_rings(vsi, true);
+       if (err)
+               return err;
+
+       clear_bit(__I40E_DOWN, &vsi->state);
+       i40e_napi_enable_all(vsi);
+       i40e_vsi_enable_irq(vsi);
+
+       if ((pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP) &&
+           (vsi->netdev)) {
+               netif_tx_start_all_queues(vsi->netdev);
+               netif_carrier_on(vsi->netdev);
+       }
+       i40e_service_event_schedule(pf);
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_reinit_locked - Reset the VSI
+ * @vsi: the VSI being configured
+ *
+ * Rebuild the ring structs after some configuration
+ * has changed, e.g. MTU size.
+ **/
+static void i40e_vsi_reinit_locked(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+
+       WARN_ON(in_interrupt());
+       while (test_and_set_bit(__I40E_CONFIG_BUSY, &pf->state))
+               usleep_range(1000, 2000);
+       i40e_down(vsi);
+
+       /* Give a VF some time to respond to the reset.  The
+        * two second wait is based upon the watchdog cycle in
+        * the VF driver.
+        */
+       if (vsi->type == I40E_VSI_SRIOV)
+               msleep(2000);
+       i40e_up(vsi);
+       clear_bit(__I40E_CONFIG_BUSY, &pf->state);
+}
+
+/**
+ * i40e_up - Bring the connection back up after being down
+ * @vsi: the VSI being configured
+ **/
+int i40e_up(struct i40e_vsi *vsi)
+{
+       int err;
+
+       err = i40e_vsi_configure(vsi);
+       if (!err)
+               err = i40e_up_complete(vsi);
+
+       return err;
+}
+
+/**
+ * i40e_down - Shutdown the connection processing
+ * @vsi: the VSI being stopped
+ **/
+void i40e_down(struct i40e_vsi *vsi)
+{
+       int i;
+
+       /* It is assumed that the caller of this function
+        * sets the vsi->state __I40E_DOWN bit.
+        */
+       if (vsi->netdev) {
+               netif_carrier_off(vsi->netdev);
+               netif_tx_disable(vsi->netdev);
+       }
+       i40e_vsi_disable_irq(vsi);
+       i40e_vsi_control_rings(vsi, false);
+       i40e_napi_disable_all(vsi);
+
+       for (i = 0; i < vsi->num_queue_pairs; i++) {
+               i40e_clean_tx_ring(&vsi->tx_rings[i]);
+               i40e_clean_rx_ring(&vsi->rx_rings[i]);
+       }
+}
+
+/**
+ * i40e_setup_tc - configure multiple traffic classes
+ * @netdev: net device to configure
+ * @tc: number of traffic classes to enable
+ **/
+static int i40e_setup_tc(struct net_device *netdev, u8 tc)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       u8 enabled_tc = 0;
+       int ret = -EINVAL;
+       int i;
+
+       /* Check if DCB enabled to continue */
+       if (!(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+               netdev_info(netdev, "DCB is not enabled for adapter\n");
+               goto exit;
+       }
+
+       /* Check if MFP enabled */
+       if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+               netdev_info(netdev, "Configuring TC not supported in MFP mode\n");
+               goto exit;
+       }
+
+       /* Check whether tc count is within enabled limit */
+       if (tc > i40e_pf_get_num_tc(pf)) {
+               netdev_info(netdev, "TC count greater than enabled on link for adapter\n");
+               goto exit;
+       }
+
+       /* Generate TC map for number of tc requested */
+       for (i = 0; i < tc; i++)
+               enabled_tc |= (1 << i);
+
+       /* Requesting same TC configuration as already enabled */
+       if (enabled_tc == vsi->tc_config.enabled_tc)
+               return 0;
+
+       /* Quiesce VSI queues */
+       i40e_quiesce_vsi(vsi);
+
+       /* Configure VSI for enabled TCs */
+       ret = i40e_vsi_config_tc(vsi, enabled_tc);
+       if (ret) {
+               netdev_info(netdev, "Failed configuring TC for VSI seid=%d\n",
+                           vsi->seid);
+               goto exit;
+       }
+
+       /* Unquiesce VSI */
+       i40e_unquiesce_vsi(vsi);
+
+exit:
+       return ret;
+}
+
+/**
+ * i40e_open - Called when a network interface is made active
+ * @netdev: network interface device structure
+ *
+ * The open entry point is called when a network interface is made
+ * active by the system (IFF_UP).  At this point all resources needed
+ * for transmit and receive operations are allocated, the interrupt
+ * handler is registered with the OS, the netdev watchdog subtask is
+ * enabled, and the stack is notified that the interface is ready.
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_open(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       char int_name[IFNAMSIZ];
+       int err;
+
+       /* disallow open during test */
+       if (test_bit(__I40E_TESTING, &pf->state))
+               return -EBUSY;
+
+       netif_carrier_off(netdev);
+
+       /* allocate descriptors */
+       err = i40e_vsi_setup_tx_resources(vsi);
+       if (err)
+               goto err_setup_tx;
+       err = i40e_vsi_setup_rx_resources(vsi);
+       if (err)
+               goto err_setup_rx;
+
+       err = i40e_vsi_configure(vsi);
+       if (err)
+               goto err_setup_rx;
+
+       snprintf(int_name, sizeof(int_name) - 1, "%s-%s",
+                dev_driver_string(&pf->pdev->dev), netdev->name);
+       err = i40e_vsi_request_irq(vsi, int_name);
+       if (err)
+               goto err_setup_rx;
+
+       err = i40e_up_complete(vsi);
+       if (err)
+               goto err_up_complete;
+
+       if ((vsi->type == I40E_VSI_MAIN) || (vsi->type == I40E_VSI_VMDQ2)) {
+               err = i40e_aq_set_vsi_broadcast(&pf->hw, vsi->seid, true, NULL);
+               if (err)
+                       netdev_info(netdev,
+                                   "couldn't set broadcast err %d aq_err %d\n",
+                                   err, pf->hw.aq.asq_last_status);
+       }
+
+       return 0;
+
+err_up_complete:
+       i40e_down(vsi);
+       i40e_vsi_free_irq(vsi);
+err_setup_rx:
+       i40e_vsi_free_rx_resources(vsi);
+err_setup_tx:
+       i40e_vsi_free_tx_resources(vsi);
+       if (vsi == pf->vsi[pf->lan_vsi])
+               i40e_do_reset(pf, (1 << __I40E_PF_RESET_REQUESTED));
+
+       return err;
+}
+
+/**
+ * i40e_close - Disables a network interface
+ * @netdev: network interface device structure
+ *
+ * The close entry point is called when an interface is de-activated
+ * by the OS.  The hardware is still under the driver's control, but
+ * this netdev interface is disabled.
+ *
+ * Returns 0, this is not allowed to fail
+ **/
+static int i40e_close(struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       if (test_and_set_bit(__I40E_DOWN, &vsi->state))
+               return 0;
+
+       i40e_down(vsi);
+       i40e_vsi_free_irq(vsi);
+
+       i40e_vsi_free_tx_resources(vsi);
+       i40e_vsi_free_rx_resources(vsi);
+
+       return 0;
+}
+
+/**
+ * i40e_do_reset - Start a PF or Core Reset sequence
+ * @pf: board private structure
+ * @reset_flags: which reset is requested
+ *
+ * The essential difference in resets is that the PF Reset
+ * doesn't clear the packet buffers, doesn't reset the PE
+ * firmware, and doesn't bother the other PFs on the chip.
+ **/
+void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags)
+{
+       u32 val;
+
+       WARN_ON(in_interrupt());
+
+       /* do the biggest reset indicated */
+       if (reset_flags & (1 << __I40E_GLOBAL_RESET_REQUESTED)) {
+
+               /* Request a Global Reset
+                *
+                * This will start the chip's countdown to the actual full
+                * chip reset event, and a warning interrupt to be sent
+                * to all PFs, including the requestor.  Our handler
+                * for the warning interrupt will deal with the shutdown
+                * and recovery of the switch setup.
+                */
+               dev_info(&pf->pdev->dev, "GlobalR requested\n");
+               val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+               val |= I40E_GLGEN_RTRIG_GLOBR_MASK;
+               wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
+
+       } else if (reset_flags & (1 << __I40E_CORE_RESET_REQUESTED)) {
+
+               /* Request a Core Reset
+                *
+                * Same as Global Reset, except does *not* include the MAC/PHY
+                */
+               dev_info(&pf->pdev->dev, "CoreR requested\n");
+               val = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+               val |= I40E_GLGEN_RTRIG_CORER_MASK;
+               wr32(&pf->hw, I40E_GLGEN_RTRIG, val);
+               i40e_flush(&pf->hw);
+
+       } else if (reset_flags & (1 << __I40E_PF_RESET_REQUESTED)) {
+
+               /* Request a PF Reset
+                *
+                * Resets only the PF-specific registers
+                *
+                * This goes directly to the tear-down and rebuild of
+                * the switch, since we need to do all the recovery as
+                * for the Core Reset.
+                */
+               dev_info(&pf->pdev->dev, "PFR requested\n");
+               i40e_handle_reset_warning(pf);
+
+       } else if (reset_flags & (1 << __I40E_REINIT_REQUESTED)) {
+               int v;
+
+               /* Find the VSI(s) that requested a re-init */
+               dev_info(&pf->pdev->dev,
+                        "VSI reinit requested\n");
+               for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+                       struct i40e_vsi *vsi = pf->vsi[v];
+                       if (vsi != NULL &&
+                           test_bit(__I40E_REINIT_REQUESTED, &vsi->state)) {
+                               i40e_vsi_reinit_locked(pf->vsi[v]);
+                               clear_bit(__I40E_REINIT_REQUESTED, &vsi->state);
+                       }
+               }
+
+               /* no further action needed, so return now */
+               return;
+       } else {
+               dev_info(&pf->pdev->dev,
+                        "bad reset request 0x%08x\n", reset_flags);
+               return;
+       }
+}
+
+/**
+ * i40e_handle_lan_overflow_event - Handler for LAN queue overflow event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ *
+ * Handler for LAN Queue Overflow Event generated by the firmware for PF
+ * and VF queues
+ **/
+static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
+                                          struct i40e_arq_event_info *e)
+{
+       struct i40e_aqc_lan_overflow *data =
+               (struct i40e_aqc_lan_overflow *)&e->desc.params.raw;
+       u32 queue = le32_to_cpu(data->prtdcb_rupto);
+       u32 qtx_ctl = le32_to_cpu(data->otx_ctl);
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vf *vf;
+       u16 vf_id;
+
+       dev_info(&pf->pdev->dev, "%s: Rx Queue Number = %d QTX_CTL=0x%08x\n",
+                __func__, queue, qtx_ctl);
+
+       /* Queue belongs to VF, find the VF and issue VF reset */
+       if (((qtx_ctl & I40E_QTX_CTL_PFVF_Q_MASK)
+           >> I40E_QTX_CTL_PFVF_Q_SHIFT) == I40E_QTX_CTL_VF_QUEUE) {
+               vf_id = (u16)((qtx_ctl & I40E_QTX_CTL_VFVM_INDX_MASK)
+                        >> I40E_QTX_CTL_VFVM_INDX_SHIFT);
+               vf_id -= hw->func_caps.vf_base_id;
+               vf = &pf->vf[vf_id];
+               i40e_vc_notify_vf_reset(vf);
+               /* Allow VF to process pending reset notification */
+               msleep(20);
+               i40e_reset_vf(vf, false);
+       }
+}
+
+/**
+ * i40e_service_event_complete - Finish up the service event
+ * @pf: board private structure
+ **/
+static void i40e_service_event_complete(struct i40e_pf *pf)
+{
+       BUG_ON(!test_bit(__I40E_SERVICE_SCHED, &pf->state));
+
+       /* flush memory to make sure state is correct before next watchog */
+       smp_mb__before_clear_bit();
+       clear_bit(__I40E_SERVICE_SCHED, &pf->state);
+}
+
+/**
+ * i40e_fdir_reinit_subtask - Worker thread to reinit FDIR filter table
+ * @pf: board private structure
+ **/
+static void i40e_fdir_reinit_subtask(struct i40e_pf *pf)
+{
+       if (!(pf->flags & I40E_FLAG_FDIR_REQUIRES_REINIT))
+               return;
+
+       pf->flags &= ~I40E_FLAG_FDIR_REQUIRES_REINIT;
+
+       /* if interface is down do nothing */
+       if (test_bit(__I40E_DOWN, &pf->state))
+               return;
+}
+
+/**
+ * i40e_vsi_link_event - notify VSI of a link event
+ * @vsi: vsi to be notified
+ * @link_up: link up or down
+ **/
+static void i40e_vsi_link_event(struct i40e_vsi *vsi, bool link_up)
+{
+       if (!vsi)
+               return;
+
+       switch (vsi->type) {
+       case I40E_VSI_MAIN:
+               if (!vsi->netdev || !vsi->netdev_registered)
+                       break;
+
+               if (link_up) {
+                       netif_carrier_on(vsi->netdev);
+                       netif_tx_wake_all_queues(vsi->netdev);
+               } else {
+                       netif_carrier_off(vsi->netdev);
+                       netif_tx_stop_all_queues(vsi->netdev);
+               }
+               break;
+
+       case I40E_VSI_SRIOV:
+               break;
+
+       case I40E_VSI_VMDQ2:
+       case I40E_VSI_CTRL:
+       case I40E_VSI_MIRROR:
+       default:
+               /* there is no notification for other VSIs */
+               break;
+       }
+}
+
+/**
+ * i40e_veb_link_event - notify elements on the veb of a link event
+ * @veb: veb to be notified
+ * @link_up: link up or down
+ **/
+static void i40e_veb_link_event(struct i40e_veb *veb, bool link_up)
+{
+       struct i40e_pf *pf;
+       int i;
+
+       if (!veb || !veb->pf)
+               return;
+       pf = veb->pf;
+
+       /* depth first... */
+       for (i = 0; i < I40E_MAX_VEB; i++)
+               if (pf->veb[i] && (pf->veb[i]->uplink_seid == veb->seid))
+                       i40e_veb_link_event(pf->veb[i], link_up);
+
+       /* ... now the local VSIs */
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i] && (pf->vsi[i]->uplink_seid == veb->seid))
+                       i40e_vsi_link_event(pf->vsi[i], link_up);
+}
+
+/**
+ * i40e_link_event - Update netif_carrier status
+ * @pf: board private structure
+ **/
+static void i40e_link_event(struct i40e_pf *pf)
+{
+       bool new_link, old_link;
+
+       new_link = (pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP);
+       old_link = (pf->hw.phy.link_info_old.link_info & I40E_AQ_LINK_UP);
+
+       if (new_link == old_link)
+               return;
+
+       netdev_info(pf->vsi[pf->lan_vsi]->netdev,
+                   "NIC Link is %s\n", (new_link ? "Up" : "Down"));
+
+       /* Notify the base of the switch tree connected to
+        * the link.  Floating VEBs are not notified.
+        */
+       if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+               i40e_veb_link_event(pf->veb[pf->lan_veb], new_link);
+       else
+               i40e_vsi_link_event(pf->vsi[pf->lan_vsi], new_link);
+
+       if (pf->vf)
+               i40e_vc_notify_link_state(pf);
+}
+
+/**
+ * i40e_check_hang_subtask - Check for hung queues and dropped interrupts
+ * @pf: board private structure
+ *
+ * Set the per-queue flags to request a check for stuck queues in the irq
+ * clean functions, then force interrupts to be sure the irq clean is called.
+ **/
+static void i40e_check_hang_subtask(struct i40e_pf *pf)
+{
+       int i, v;
+
+       /* If we're down or resetting, just bail */
+       if (test_bit(__I40E_CONFIG_BUSY, &pf->state))
+               return;
+
+       /* for each VSI/netdev
+        *     for each Tx queue
+        *         set the check flag
+        *     for each q_vector
+        *         force an interrupt
+        */
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               struct i40e_vsi *vsi = pf->vsi[v];
+               int armed = 0;
+
+               if (!pf->vsi[v] ||
+                   test_bit(__I40E_DOWN, &vsi->state) ||
+                   (vsi->netdev && !netif_carrier_ok(vsi->netdev)))
+                       continue;
+
+               for (i = 0; i < vsi->num_queue_pairs; i++) {
+                       set_check_for_tx_hang(&vsi->tx_rings[i]);
+                       if (test_bit(__I40E_HANG_CHECK_ARMED,
+                                    &vsi->tx_rings[i].state))
+                               armed++;
+               }
+
+               if (armed) {
+                       if (!(pf->flags & I40E_FLAG_MSIX_ENABLED)) {
+                               wr32(&vsi->back->hw, I40E_PFINT_DYN_CTL0,
+                                    (I40E_PFINT_DYN_CTL0_INTENA_MASK |
+                                     I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK));
+                       } else {
+                               u16 vec = vsi->base_vector - 1;
+                               u32 val = (I40E_PFINT_DYN_CTLN_INTENA_MASK |
+                                          I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK);
+                               for (i = 0; i < vsi->num_q_vectors; i++, vec++)
+                                       wr32(&vsi->back->hw,
+                                            I40E_PFINT_DYN_CTLN(vec), val);
+                       }
+                       i40e_flush(&vsi->back->hw);
+               }
+       }
+}
+
+/**
+ * i40e_watchdog_subtask - Check and bring link up
+ * @pf: board private structure
+ **/
+static void i40e_watchdog_subtask(struct i40e_pf *pf)
+{
+       int i;
+
+       /* if interface is down do nothing */
+       if (test_bit(__I40E_DOWN, &pf->state) ||
+           test_bit(__I40E_CONFIG_BUSY, &pf->state))
+               return;
+
+       /* Update the stats for active netdevs so the network stack
+        * can look at updated numbers whenever it cares to
+        */
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i] && pf->vsi[i]->netdev)
+                       i40e_update_stats(pf->vsi[i]);
+
+       /* Update the stats for the active switching components */
+       for (i = 0; i < I40E_MAX_VEB; i++)
+               if (pf->veb[i])
+                       i40e_update_veb_stats(pf->veb[i]);
+}
+
+/**
+ * i40e_reset_subtask - Set up for resetting the device and driver
+ * @pf: board private structure
+ **/
+static void i40e_reset_subtask(struct i40e_pf *pf)
+{
+       u32 reset_flags = 0;
+
+       if (test_bit(__I40E_REINIT_REQUESTED, &pf->state)) {
+               reset_flags |= (1 << __I40E_REINIT_REQUESTED);
+               clear_bit(__I40E_REINIT_REQUESTED, &pf->state);
+       }
+       if (test_bit(__I40E_PF_RESET_REQUESTED, &pf->state)) {
+               reset_flags |= (1 << __I40E_PF_RESET_REQUESTED);
+               clear_bit(__I40E_PF_RESET_REQUESTED, &pf->state);
+       }
+       if (test_bit(__I40E_CORE_RESET_REQUESTED, &pf->state)) {
+               reset_flags |= (1 << __I40E_CORE_RESET_REQUESTED);
+               clear_bit(__I40E_CORE_RESET_REQUESTED, &pf->state);
+       }
+       if (test_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state)) {
+               reset_flags |= (1 << __I40E_GLOBAL_RESET_REQUESTED);
+               clear_bit(__I40E_GLOBAL_RESET_REQUESTED, &pf->state);
+       }
+
+       /* If there's a recovery already waiting, it takes
+        * precedence before starting a new reset sequence.
+        */
+       if (test_bit(__I40E_RESET_INTR_RECEIVED, &pf->state)) {
+               i40e_handle_reset_warning(pf);
+               return;
+       }
+
+       /* If we're already down or resetting, just bail */
+       if (reset_flags &&
+           !test_bit(__I40E_DOWN, &pf->state) &&
+           !test_bit(__I40E_CONFIG_BUSY, &pf->state))
+               i40e_do_reset(pf, reset_flags);
+}
+
+/**
+ * i40e_handle_link_event - Handle link event
+ * @pf: board private structure
+ * @e: event info posted on ARQ
+ **/
+static void i40e_handle_link_event(struct i40e_pf *pf,
+                                  struct i40e_arq_event_info *e)
+{
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_aqc_get_link_status *status =
+               (struct i40e_aqc_get_link_status *)&e->desc.params.raw;
+       struct i40e_link_status *hw_link_info = &hw->phy.link_info;
+
+       /* save off old link status information */
+       memcpy(&pf->hw.phy.link_info_old, hw_link_info,
+              sizeof(pf->hw.phy.link_info_old));
+
+       /* update link status */
+       hw_link_info->phy_type = (enum i40e_aq_phy_type)status->phy_type;
+       hw_link_info->link_speed = (enum i40e_aq_link_speed)status->link_speed;
+       hw_link_info->link_info = status->link_info;
+       hw_link_info->an_info = status->an_info;
+       hw_link_info->ext_info = status->ext_info;
+       hw_link_info->lse_enable =
+               le16_to_cpu(status->command_flags) &
+                           I40E_AQ_LSE_ENABLE;
+
+       /* process the event */
+       i40e_link_event(pf);
+
+       /* Do a new status request to re-enable LSE reporting
+        * and load new status information into the hw struct,
+        * then see if the status changed while processing the
+        * initial event.
+        */
+       i40e_aq_get_link_info(&pf->hw, true, NULL, NULL);
+       i40e_link_event(pf);
+}
+
+/**
+ * i40e_clean_adminq_subtask - Clean the AdminQ rings
+ * @pf: board private structure
+ **/
+static void i40e_clean_adminq_subtask(struct i40e_pf *pf)
+{
+       struct i40e_arq_event_info event;
+       struct i40e_hw *hw = &pf->hw;
+       u16 pending, i = 0;
+       i40e_status ret;
+       u16 opcode;
+       u32 val;
+
+       if (!test_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state))
+               return;
+
+       event.msg_size = I40E_MAX_AQ_BUF_SIZE;
+       event.msg_buf = kzalloc(event.msg_size, GFP_KERNEL);
+       if (!event.msg_buf)
+               return;
+
+       do {
+               ret = i40e_clean_arq_element(hw, &event, &pending);
+               if (ret == I40E_ERR_ADMIN_QUEUE_NO_WORK) {
+                       dev_info(&pf->pdev->dev, "No ARQ event found\n");
+                       break;
+               } else if (ret) {
+                       dev_info(&pf->pdev->dev, "ARQ event error %d\n", ret);
+                       break;
+               }
+
+               opcode = le16_to_cpu(event.desc.opcode);
+               switch (opcode) {
+
+               case i40e_aqc_opc_get_link_status:
+                       i40e_handle_link_event(pf, &event);
+                       break;
+               case i40e_aqc_opc_send_msg_to_pf:
+                       ret = i40e_vc_process_vf_msg(pf,
+                                       le16_to_cpu(event.desc.retval),
+                                       le32_to_cpu(event.desc.cookie_high),
+                                       le32_to_cpu(event.desc.cookie_low),
+                                       event.msg_buf,
+                                       event.msg_size);
+                       break;
+               case i40e_aqc_opc_lldp_update_mib:
+                       dev_info(&pf->pdev->dev, "ARQ: Update LLDP MIB event received\n");
+                       break;
+               case i40e_aqc_opc_event_lan_overflow:
+                       dev_info(&pf->pdev->dev, "ARQ LAN queue overflow event received\n");
+                       i40e_handle_lan_overflow_event(pf, &event);
+                       break;
+               default:
+                       dev_info(&pf->pdev->dev,
+                                "ARQ Error: Unknown event %d received\n",
+                                event.desc.opcode);
+                       break;
+               }
+       } while (pending && (i++ < pf->adminq_work_limit));
+
+       clear_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state);
+       /* re-enable Admin queue interrupt cause */
+       val = rd32(hw, I40E_PFINT_ICR0_ENA);
+       val |=  I40E_PFINT_ICR0_ENA_ADMINQ_MASK;
+       wr32(hw, I40E_PFINT_ICR0_ENA, val);
+       i40e_flush(hw);
+
+       kfree(event.msg_buf);
+}
+
+/**
+ * i40e_reconstitute_veb - rebuild the VEB and anything connected to it
+ * @veb: pointer to the VEB instance
+ *
+ * This is a recursive function that first builds the attached VSIs then
+ * recurses in to build the next layer of VEB.  We track the connections
+ * through our own index numbers because the seid's from the HW could
+ * change across the reset.
+ **/
+static int i40e_reconstitute_veb(struct i40e_veb *veb)
+{
+       struct i40e_vsi *ctl_vsi = NULL;
+       struct i40e_pf *pf = veb->pf;
+       int v, veb_idx;
+       int ret;
+
+       /* build VSI that owns this VEB, temporarily attached to base VEB */
+       for (v = 0; v < pf->hw.func_caps.num_vsis && !ctl_vsi; v++) {
+               if (pf->vsi[v] &&
+                   pf->vsi[v]->veb_idx == veb->idx &&
+                   pf->vsi[v]->flags & I40E_VSI_FLAG_VEB_OWNER) {
+                       ctl_vsi = pf->vsi[v];
+                       break;
+               }
+       }
+       if (!ctl_vsi) {
+               dev_info(&pf->pdev->dev,
+                        "missing owner VSI for veb_idx %d\n", veb->idx);
+               ret = -ENOENT;
+               goto end_reconstitute;
+       }
+       if (ctl_vsi != pf->vsi[pf->lan_vsi])
+               ctl_vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+       ret = i40e_add_vsi(ctl_vsi);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "rebuild of owner VSI failed: %d\n", ret);
+               goto end_reconstitute;
+       }
+       i40e_vsi_reset_stats(ctl_vsi);
+
+       /* create the VEB in the switch and move the VSI onto the VEB */
+       ret = i40e_add_veb(veb, ctl_vsi);
+       if (ret)
+               goto end_reconstitute;
+
+       /* create the remaining VSIs attached to this VEB */
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               if (!pf->vsi[v] || pf->vsi[v] == ctl_vsi)
+                       continue;
+
+               if (pf->vsi[v]->veb_idx == veb->idx) {
+                       struct i40e_vsi *vsi = pf->vsi[v];
+                       vsi->uplink_seid = veb->seid;
+                       ret = i40e_add_vsi(vsi);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "rebuild of vsi_idx %d failed: %d\n",
+                                        v, ret);
+                               goto end_reconstitute;
+                       }
+                       i40e_vsi_reset_stats(vsi);
+               }
+       }
+
+       /* create any VEBs attached to this VEB - RECURSION */
+       for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
+               if (pf->veb[veb_idx] && pf->veb[veb_idx]->veb_idx == veb->idx) {
+                       pf->veb[veb_idx]->uplink_seid = veb->seid;
+                       ret = i40e_reconstitute_veb(pf->veb[veb_idx]);
+                       if (ret)
+                               break;
+               }
+       }
+
+end_reconstitute:
+       return ret;
+}
+
+/**
+ * i40e_get_capabilities - get info about the HW
+ * @pf: the PF struct
+ **/
+static int i40e_get_capabilities(struct i40e_pf *pf)
+{
+       struct i40e_aqc_list_capabilities_element_resp *cap_buf;
+       u16 data_size;
+       int buf_len;
+       int err;
+
+       buf_len = 40 * sizeof(struct i40e_aqc_list_capabilities_element_resp);
+       do {
+               cap_buf = kzalloc(buf_len, GFP_KERNEL);
+               if (!cap_buf)
+                       return -ENOMEM;
+
+               /* this loads the data into the hw struct for us */
+               err = i40e_aq_discover_capabilities(&pf->hw, cap_buf, buf_len,
+                                           &data_size,
+                                           i40e_aqc_opc_list_func_capabilities,
+                                           NULL);
+               /* data loaded, buffer no longer needed */
+               kfree(cap_buf);
+
+               if (pf->hw.aq.asq_last_status == I40E_AQ_RC_ENOMEM) {
+                       /* retry with a larger buffer */
+                       buf_len = data_size;
+               } else if (pf->hw.aq.asq_last_status != I40E_AQ_RC_OK) {
+                       dev_info(&pf->pdev->dev,
+                                "capability discovery failed: aq=%d\n",
+                                pf->hw.aq.asq_last_status);
+                       return -ENODEV;
+               }
+       } while (err);
+
+       if (pf->hw.debug_mask & I40E_DEBUG_USER)
+               dev_info(&pf->pdev->dev,
+                        "pf=%d, num_vfs=%d, msix_pf=%d, msix_vf=%d, fd_g=%d, fd_b=%d, pf_max_q=%d num_vsi=%d\n",
+                        pf->hw.pf_id, pf->hw.func_caps.num_vfs,
+                        pf->hw.func_caps.num_msix_vectors,
+                        pf->hw.func_caps.num_msix_vectors_vf,
+                        pf->hw.func_caps.fd_filters_guaranteed,
+                        pf->hw.func_caps.fd_filters_best_effort,
+                        pf->hw.func_caps.num_tx_qp,
+                        pf->hw.func_caps.num_vsis);
+
+       return 0;
+}
+
+/**
+ * i40e_fdir_setup - initialize the Flow Director resources
+ * @pf: board private structure
+ **/
+static void i40e_fdir_setup(struct i40e_pf *pf)
+{
+       struct i40e_vsi *vsi;
+       bool new_vsi = false;
+       int err, i;
+
+       if (!(pf->flags & (I40E_FLAG_FDIR_ENABLED|I40E_FLAG_FDIR_ATR_ENABLED)))
+               return;
+
+       pf->atr_sample_rate = I40E_DEFAULT_ATR_SAMPLE_RATE;
+
+       /* find existing or make new FDIR VSI */
+       vsi = NULL;
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
+                       vsi = pf->vsi[i];
+       if (!vsi) {
+               vsi = i40e_vsi_setup(pf, I40E_VSI_FDIR, pf->mac_seid, 0);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "Couldn't create FDir VSI\n");
+                       pf->flags &= ~I40E_FLAG_FDIR_ENABLED;
+                       return;
+               }
+               new_vsi = true;
+       }
+       WARN_ON(vsi->base_queue != I40E_FDIR_RING);
+       i40e_vsi_setup_irqhandler(vsi, i40e_fdir_clean_rings);
+
+       err = i40e_vsi_setup_tx_resources(vsi);
+       if (!err)
+               err = i40e_vsi_setup_rx_resources(vsi);
+       if (!err)
+               err = i40e_vsi_configure(vsi);
+       if (!err && new_vsi) {
+               char int_name[IFNAMSIZ + 9];
+               snprintf(int_name, sizeof(int_name) - 1, "%s-fdir",
+                        dev_driver_string(&pf->pdev->dev));
+               err = i40e_vsi_request_irq(vsi, int_name);
+       }
+       if (!err)
+               err = i40e_up_complete(vsi);
+
+       clear_bit(__I40E_NEEDS_RESTART, &vsi->state);
+}
+
+/**
+ * i40e_fdir_teardown - release the Flow Director resources
+ * @pf: board private structure
+ **/
+static void i40e_fdir_teardown(struct i40e_pf *pf)
+{
+       int i;
+
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
+                       i40e_vsi_release(pf->vsi[i]);
+                       break;
+               }
+       }
+}
+
+/**
+ * i40e_handle_reset_warning - prep for the core to reset
+ * @pf: board private structure
+ *
+ * Close up the VFs and other things in prep for a Core Reset,
+ * then get ready to rebuild the world.
+ **/
+static void i40e_handle_reset_warning(struct i40e_pf *pf)
+{
+       struct i40e_driver_version dv;
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status ret;
+       u32 v;
+
+       clear_bit(__I40E_RESET_INTR_RECEIVED, &pf->state);
+       if (test_and_set_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state))
+               return;
+
+       dev_info(&pf->pdev->dev, "Tearing down internal switch for reset\n");
+
+       i40e_vc_notify_reset(pf);
+
+       /* quiesce the VSIs and their queues that are not already DOWN */
+       i40e_pf_quiesce_all_vsi(pf);
+
+       for (v = 0; v < pf->hw.func_caps.num_vsis; v++) {
+               if (pf->vsi[v])
+                       pf->vsi[v]->seid = 0;
+       }
+
+       i40e_shutdown_adminq(&pf->hw);
+
+       /* Now we wait for GRST to settle out.
+        * We don't have to delete the VEBs or VSIs from the hw switch
+        * because the reset will make them disappear.
+        */
+       ret = i40e_pf_reset(hw);
+       if (ret)
+               dev_info(&pf->pdev->dev, "PF reset failed, %d\n", ret);
+       pf->pfr_count++;
+
+       if (test_bit(__I40E_DOWN, &pf->state))
+               goto end_core_reset;
+       dev_info(&pf->pdev->dev, "Rebuilding internal switch\n");
+
+       /* rebuild the basics for the AdminQ, HMC, and initial HW switch */
+       ret = i40e_init_adminq(&pf->hw);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "Rebuild AdminQ failed, %d\n", ret);
+               goto end_core_reset;
+       }
+
+       ret = i40e_get_capabilities(pf);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "i40e_get_capabilities failed, %d\n",
+                        ret);
+               goto end_core_reset;
+       }
+
+       /* call shutdown HMC */
+       ret = i40e_shutdown_lan_hmc(hw);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "shutdown_lan_hmc failed: %d\n", ret);
+               goto end_core_reset;
+       }
+
+       ret = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
+                               hw->func_caps.num_rx_qp,
+                               pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "init_lan_hmc failed: %d\n", ret);
+               goto end_core_reset;
+       }
+       ret = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "configure_lan_hmc failed: %d\n", ret);
+               goto end_core_reset;
+       }
+
+       /* do basic switch setup */
+       ret = i40e_setup_pf_switch(pf);
+       if (ret)
+               goto end_core_reset;
+
+       /* Rebuild the VSIs and VEBs that existed before reset.
+        * They are still in our local switch element arrays, so only
+        * need to rebuild the switch model in the HW.
+        *
+        * If there were VEBs but the reconstitution failed, we'll try
+        * try to recover minimal use by getting the basic PF VSI working.
+        */
+       if (pf->vsi[pf->lan_vsi]->uplink_seid != pf->mac_seid) {
+               dev_info(&pf->pdev->dev, "attempting to rebuild switch\n");
+               /* find the one VEB connected to the MAC, and find orphans */
+               for (v = 0; v < I40E_MAX_VEB; v++) {
+                       if (!pf->veb[v])
+                               continue;
+
+                       if (pf->veb[v]->uplink_seid == pf->mac_seid ||
+                           pf->veb[v]->uplink_seid == 0) {
+                               ret = i40e_reconstitute_veb(pf->veb[v]);
+
+                               if (!ret)
+                                       continue;
+
+                               /* If Main VEB failed, we're in deep doodoo,
+                                * so give up rebuilding the switch and set up
+                                * for minimal rebuild of PF VSI.
+                                * If orphan failed, we'll report the error
+                                * but try to keep going.
+                                */
+                               if (pf->veb[v]->uplink_seid == pf->mac_seid) {
+                                       dev_info(&pf->pdev->dev,
+                                                "rebuild of switch failed: %d, will try to set up simple PF connection\n",
+                                                ret);
+                                       pf->vsi[pf->lan_vsi]->uplink_seid
+                                                               = pf->mac_seid;
+                                       break;
+                               } else if (pf->veb[v]->uplink_seid == 0) {
+                                       dev_info(&pf->pdev->dev,
+                                                "rebuild of orphan VEB failed: %d\n",
+                                                ret);
+                               }
+                       }
+               }
+       }
+
+       if (pf->vsi[pf->lan_vsi]->uplink_seid == pf->mac_seid) {
+               dev_info(&pf->pdev->dev, "attempting to rebuild PF VSI\n");
+               /* no VEB, so rebuild only the Main VSI */
+               ret = i40e_add_vsi(pf->vsi[pf->lan_vsi]);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "rebuild of Main VSI failed: %d\n", ret);
+                       goto end_core_reset;
+               }
+       }
+
+       /* reinit the misc interrupt */
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               ret = i40e_setup_misc_vector(pf);
+
+       /* restart the VSIs that were rebuilt and running before the reset */
+       i40e_pf_unquiesce_all_vsi(pf);
+
+       /* tell the firmware that we're starting */
+       dv.major_version = DRV_VERSION_MAJOR;
+       dv.minor_version = DRV_VERSION_MINOR;
+       dv.build_version = DRV_VERSION_BUILD;
+       dv.subbuild_version = 0;
+       i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
+
+       dev_info(&pf->pdev->dev, "PF reset done\n");
+
+end_core_reset:
+       clear_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state);
+}
+
+/**
+ * i40e_handle_mdd_event
+ * @pf: pointer to the pf structure
+ *
+ * Called from the MDD irq handler to identify possibly malicious vfs
+ **/
+static void i40e_handle_mdd_event(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       bool mdd_detected = false;
+       struct i40e_vf *vf;
+       u32 reg;
+       int i;
+
+       if (!test_bit(__I40E_MDD_EVENT_PENDING, &pf->state))
+               return;
+
+       /* find what triggered the MDD event */
+       reg = rd32(hw, I40E_GL_MDET_TX);
+       if (reg & I40E_GL_MDET_TX_VALID_MASK) {
+               u8 func = (reg & I40E_GL_MDET_TX_FUNCTION_MASK)
+                               >> I40E_GL_MDET_TX_FUNCTION_SHIFT;
+               u8 event = (reg & I40E_GL_MDET_TX_EVENT_SHIFT)
+                               >> I40E_GL_MDET_TX_EVENT_SHIFT;
+               u8 queue = (reg & I40E_GL_MDET_TX_QUEUE_MASK)
+                               >> I40E_GL_MDET_TX_QUEUE_SHIFT;
+               dev_info(&pf->pdev->dev,
+                        "Malicious Driver Detection TX event 0x%02x on q %d of function 0x%02x\n",
+                        event, queue, func);
+               wr32(hw, I40E_GL_MDET_TX, 0xffffffff);
+               mdd_detected = true;
+       }
+       reg = rd32(hw, I40E_GL_MDET_RX);
+       if (reg & I40E_GL_MDET_RX_VALID_MASK) {
+               u8 func = (reg & I40E_GL_MDET_RX_FUNCTION_MASK)
+                               >> I40E_GL_MDET_RX_FUNCTION_SHIFT;
+               u8 event = (reg & I40E_GL_MDET_RX_EVENT_SHIFT)
+                               >> I40E_GL_MDET_RX_EVENT_SHIFT;
+               u8 queue = (reg & I40E_GL_MDET_RX_QUEUE_MASK)
+                               >> I40E_GL_MDET_RX_QUEUE_SHIFT;
+               dev_info(&pf->pdev->dev,
+                        "Malicious Driver Detection RX event 0x%02x on q %d of function 0x%02x\n",
+                        event, queue, func);
+               wr32(hw, I40E_GL_MDET_RX, 0xffffffff);
+               mdd_detected = true;
+       }
+
+       /* see if one of the VFs needs its hand slapped */
+       for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+               vf = &(pf->vf[i]);
+               reg = rd32(hw, I40E_VP_MDET_TX(i));
+               if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+                       wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
+                       vf->num_mdd_events++;
+                       dev_info(&pf->pdev->dev, "MDD TX event on VF %d\n", i);
+               }
+
+               reg = rd32(hw, I40E_VP_MDET_RX(i));
+               if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+                       wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
+                       vf->num_mdd_events++;
+                       dev_info(&pf->pdev->dev, "MDD RX event on VF %d\n", i);
+               }
+
+               if (vf->num_mdd_events > I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED) {
+                       dev_info(&pf->pdev->dev,
+                                "Too many MDD events on VF %d, disabled\n", i);
+                       dev_info(&pf->pdev->dev,
+                                "Use PF Control I/F to re-enable the VF\n");
+                       set_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+               }
+       }
+
+       /* re-enable mdd interrupt cause */
+       clear_bit(__I40E_MDD_EVENT_PENDING, &pf->state);
+       reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+       reg |=  I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
+       wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_service_task - Run the driver's async subtasks
+ * @work: pointer to work_struct containing our data
+ **/
+static void i40e_service_task(struct work_struct *work)
+{
+       struct i40e_pf *pf = container_of(work,
+                                         struct i40e_pf,
+                                         service_task);
+       unsigned long start_time = jiffies;
+
+       i40e_reset_subtask(pf);
+       i40e_handle_mdd_event(pf);
+       i40e_vc_process_vflr_event(pf);
+       i40e_watchdog_subtask(pf);
+       i40e_fdir_reinit_subtask(pf);
+       i40e_check_hang_subtask(pf);
+       i40e_sync_filters_subtask(pf);
+       i40e_clean_adminq_subtask(pf);
+
+       i40e_service_event_complete(pf);
+
+       /* If the tasks have taken longer than one timer cycle or there
+        * is more work to be done, reschedule the service task now
+        * rather than wait for the timer to tick again.
+        */
+       if (time_after(jiffies, (start_time + pf->service_timer_period)) ||
+           test_bit(__I40E_ADMINQ_EVENT_PENDING, &pf->state)            ||
+           test_bit(__I40E_MDD_EVENT_PENDING, &pf->state)               ||
+           test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state))
+               i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_service_timer - timer callback
+ * @data: pointer to PF struct
+ **/
+static void i40e_service_timer(unsigned long data)
+{
+       struct i40e_pf *pf = (struct i40e_pf *)data;
+
+       mod_timer(&pf->service_timer,
+                 round_jiffies(jiffies + pf->service_timer_period));
+       i40e_service_event_schedule(pf);
+}
+
+/**
+ * i40e_set_num_rings_in_vsi - Determine number of rings in the VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_set_num_rings_in_vsi(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+
+       switch (vsi->type) {
+       case I40E_VSI_MAIN:
+               vsi->alloc_queue_pairs = pf->num_lan_qps;
+               vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+                                     I40E_REQ_DESCRIPTOR_MULTIPLE);
+               if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+                       vsi->num_q_vectors = pf->num_lan_msix;
+               else
+                       vsi->num_q_vectors = 1;
+
+               break;
+
+       case I40E_VSI_FDIR:
+               vsi->alloc_queue_pairs = 1;
+               vsi->num_desc = ALIGN(I40E_FDIR_RING_COUNT,
+                                     I40E_REQ_DESCRIPTOR_MULTIPLE);
+               vsi->num_q_vectors = 1;
+               break;
+
+       case I40E_VSI_VMDQ2:
+               vsi->alloc_queue_pairs = pf->num_vmdq_qps;
+               vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+                                     I40E_REQ_DESCRIPTOR_MULTIPLE);
+               vsi->num_q_vectors = pf->num_vmdq_msix;
+               break;
+
+       case I40E_VSI_SRIOV:
+               vsi->alloc_queue_pairs = pf->num_vf_qps;
+               vsi->num_desc = ALIGN(I40E_DEFAULT_NUM_DESCRIPTORS,
+                                     I40E_REQ_DESCRIPTOR_MULTIPLE);
+               break;
+
+       default:
+               WARN_ON(1);
+               return -ENODATA;
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_mem_alloc - Allocates the next available struct vsi in the PF
+ * @pf: board private structure
+ * @type: type of VSI
+ *
+ * On error: returns error code (negative)
+ * On success: returns vsi index in PF (positive)
+ **/
+static int i40e_vsi_mem_alloc(struct i40e_pf *pf, enum i40e_vsi_type type)
+{
+       int ret = -ENODEV;
+       struct i40e_vsi *vsi;
+       int vsi_idx;
+       int i;
+
+       /* Need to protect the allocation of the VSIs at the PF level */
+       mutex_lock(&pf->switch_mutex);
+
+       /* VSI list may be fragmented if VSI creation/destruction has
+        * been happening.  We can afford to do a quick scan to look
+        * for any free VSIs in the list.
+        *
+        * find next empty vsi slot, looping back around if necessary
+        */
+       i = pf->next_vsi;
+       while (i < pf->hw.func_caps.num_vsis && pf->vsi[i])
+               i++;
+       if (i >= pf->hw.func_caps.num_vsis) {
+               i = 0;
+               while (i < pf->next_vsi && pf->vsi[i])
+                       i++;
+       }
+
+       if (i < pf->hw.func_caps.num_vsis && !pf->vsi[i]) {
+               vsi_idx = i;             /* Found one! */
+       } else {
+               ret = -ENODEV;
+               goto err_alloc_vsi;  /* out of VSI slots! */
+       }
+       pf->next_vsi = ++i;
+
+       vsi = kzalloc(sizeof(*vsi), GFP_KERNEL);
+       if (!vsi) {
+               ret = -ENOMEM;
+               goto err_alloc_vsi;
+       }
+       vsi->type = type;
+       vsi->back = pf;
+       set_bit(__I40E_DOWN, &vsi->state);
+       vsi->flags = 0;
+       vsi->idx = vsi_idx;
+       vsi->rx_itr_setting = pf->rx_itr_default;
+       vsi->tx_itr_setting = pf->tx_itr_default;
+       vsi->netdev_registered = false;
+       vsi->work_limit = I40E_DEFAULT_IRQ_WORK;
+       INIT_LIST_HEAD(&vsi->mac_filter_list);
+
+       i40e_set_num_rings_in_vsi(vsi);
+
+       /* Setup default MSIX irq handler for VSI */
+       i40e_vsi_setup_irqhandler(vsi, i40e_msix_clean_rings);
+
+       pf->vsi[vsi_idx] = vsi;
+       ret = vsi_idx;
+err_alloc_vsi:
+       mutex_unlock(&pf->switch_mutex);
+       return ret;
+}
+
+/**
+ * i40e_vsi_clear - Deallocate the VSI provided
+ * @vsi: the VSI being un-configured
+ **/
+static int i40e_vsi_clear(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf;
+
+       if (!vsi)
+               return 0;
+
+       if (!vsi->back)
+               goto free_vsi;
+       pf = vsi->back;
+
+       mutex_lock(&pf->switch_mutex);
+       if (!pf->vsi[vsi->idx]) {
+               dev_err(&pf->pdev->dev, "pf->vsi[%d] is NULL, just free vsi[%d](%p,type %d)\n",
+                       vsi->idx, vsi->idx, vsi, vsi->type);
+               goto unlock_vsi;
+       }
+
+       if (pf->vsi[vsi->idx] != vsi) {
+               dev_err(&pf->pdev->dev,
+                       "pf->vsi[%d](%p, type %d) != vsi[%d](%p,type %d): no free!\n",
+                       pf->vsi[vsi->idx]->idx,
+                       pf->vsi[vsi->idx],
+                       pf->vsi[vsi->idx]->type,
+                       vsi->idx, vsi, vsi->type);
+               goto unlock_vsi;
+       }
+
+       /* updates the pf for this cleared vsi */
+       i40e_put_lump(pf->qp_pile, vsi->base_queue, vsi->idx);
+       i40e_put_lump(pf->irq_pile, vsi->base_vector, vsi->idx);
+
+       pf->vsi[vsi->idx] = NULL;
+       if (vsi->idx < pf->next_vsi)
+               pf->next_vsi = vsi->idx;
+
+unlock_vsi:
+       mutex_unlock(&pf->switch_mutex);
+free_vsi:
+       kfree(vsi);
+
+       return 0;
+}
+
+/**
+ * i40e_alloc_rings - Allocates the Rx and Tx rings for the provided VSI
+ * @vsi: the VSI being configured
+ **/
+static int i40e_alloc_rings(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       int ret = 0;
+       int i;
+
+       vsi->rx_rings = kcalloc(vsi->alloc_queue_pairs,
+                               sizeof(struct i40e_ring), GFP_KERNEL);
+       if (!vsi->rx_rings) {
+               ret = -ENOMEM;
+               goto err_alloc_rings;
+       }
+
+       vsi->tx_rings = kcalloc(vsi->alloc_queue_pairs,
+                               sizeof(struct i40e_ring), GFP_KERNEL);
+       if (!vsi->tx_rings) {
+               ret = -ENOMEM;
+               kfree(vsi->rx_rings);
+               goto err_alloc_rings;
+       }
+
+       /* Set basic values in the rings to be used later during open() */
+       for (i = 0; i < vsi->alloc_queue_pairs; i++) {
+               struct i40e_ring *rx_ring = &vsi->rx_rings[i];
+               struct i40e_ring *tx_ring = &vsi->tx_rings[i];
+
+               tx_ring->queue_index = i;
+               tx_ring->reg_idx = vsi->base_queue + i;
+               tx_ring->ring_active = false;
+               tx_ring->vsi = vsi;
+               tx_ring->netdev = vsi->netdev;
+               tx_ring->dev = &pf->pdev->dev;
+               tx_ring->count = vsi->num_desc;
+               tx_ring->size = 0;
+               tx_ring->dcb_tc = 0;
+
+               rx_ring->queue_index = i;
+               rx_ring->reg_idx = vsi->base_queue + i;
+               rx_ring->ring_active = false;
+               rx_ring->vsi = vsi;
+               rx_ring->netdev = vsi->netdev;
+               rx_ring->dev = &pf->pdev->dev;
+               rx_ring->count = vsi->num_desc;
+               rx_ring->size = 0;
+               rx_ring->dcb_tc = 0;
+               if (pf->flags & I40E_FLAG_16BYTE_RX_DESC_ENABLED)
+                       set_ring_16byte_desc_enabled(rx_ring);
+               else
+                       clear_ring_16byte_desc_enabled(rx_ring);
+       }
+
+err_alloc_rings:
+       return ret;
+}
+
+/**
+ * i40e_vsi_clear_rings - Deallocates the Rx and Tx rings for the provided VSI
+ * @vsi: the VSI being cleaned
+ **/
+static int i40e_vsi_clear_rings(struct i40e_vsi *vsi)
+{
+       if (vsi) {
+               kfree(vsi->rx_rings);
+               kfree(vsi->tx_rings);
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_reserve_msix_vectors - Reserve MSI-X vectors in the kernel
+ * @pf: board private structure
+ * @vectors: the number of MSI-X vectors to request
+ *
+ * Returns the number of vectors reserved, or error
+ **/
+static int i40e_reserve_msix_vectors(struct i40e_pf *pf, int vectors)
+{
+       int err = 0;
+
+       pf->num_msix_entries = 0;
+       while (vectors >= I40E_MIN_MSIX) {
+               err = pci_enable_msix(pf->pdev, pf->msix_entries, vectors);
+               if (err == 0) {
+                       /* good to go */
+                       pf->num_msix_entries = vectors;
+                       break;
+               } else if (err < 0) {
+                       /* total failure */
+                       dev_info(&pf->pdev->dev,
+                                "MSI-X vector reservation failed: %d\n", err);
+                       vectors = 0;
+                       break;
+               } else {
+                       /* err > 0 is the hint for retry */
+                       dev_info(&pf->pdev->dev,
+                                "MSI-X vectors wanted %d, retrying with %d\n",
+                                vectors, err);
+                       vectors = err;
+               }
+       }
+
+       if (vectors > 0 && vectors < I40E_MIN_MSIX) {
+               dev_info(&pf->pdev->dev,
+                        "Couldn't get enough vectors, only %d available\n",
+                        vectors);
+               vectors = 0;
+       }
+
+       return vectors;
+}
+
+/**
+ * i40e_init_msix - Setup the MSIX capability
+ * @pf: board private structure
+ *
+ * Work with the OS to set up the MSIX vectors needed.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_init_msix(struct i40e_pf *pf)
+{
+       i40e_status err = 0;
+       struct i40e_hw *hw = &pf->hw;
+       int v_budget, i;
+       int vec;
+
+       if (!(pf->flags & I40E_FLAG_MSIX_ENABLED))
+               return -ENODEV;
+
+       /* The number of vectors we'll request will be comprised of:
+        *   - Add 1 for "other" cause for Admin Queue events, etc.
+        *   - The number of LAN queue pairs
+        *        already adjusted for the NUMA node
+        *        assumes symmetric Tx/Rx pairing
+        *   - The number of VMDq pairs
+        * Once we count this up, try the request.
+        *
+        * If we can't get what we want, we'll simplify to nearly nothing
+        * and try again.  If that still fails, we punt.
+        */
+       pf->num_lan_msix = pf->num_lan_qps;
+       pf->num_vmdq_msix = pf->num_vmdq_qps;
+       v_budget = 1 + pf->num_lan_msix;
+       v_budget += (pf->num_vmdq_vsis * pf->num_vmdq_msix);
+       if (pf->flags & I40E_FLAG_FDIR_ENABLED)
+               v_budget++;
+
+       /* Scale down if necessary, and the rings will share vectors */
+       v_budget = min_t(int, v_budget, hw->func_caps.num_msix_vectors);
+
+       pf->msix_entries = kcalloc(v_budget, sizeof(struct msix_entry),
+                                  GFP_KERNEL);
+       if (!pf->msix_entries)
+               return -ENOMEM;
+
+       for (i = 0; i < v_budget; i++)
+               pf->msix_entries[i].entry = i;
+       vec = i40e_reserve_msix_vectors(pf, v_budget);
+       if (vec < I40E_MIN_MSIX) {
+               pf->flags &= ~I40E_FLAG_MSIX_ENABLED;
+               kfree(pf->msix_entries);
+               pf->msix_entries = NULL;
+               return -ENODEV;
+
+       } else if (vec == I40E_MIN_MSIX) {
+               /* Adjust for minimal MSIX use */
+               dev_info(&pf->pdev->dev, "Features disabled, not enough MSIX vectors\n");
+               pf->flags &= ~I40E_FLAG_VMDQ_ENABLED;
+               pf->num_vmdq_vsis = 0;
+               pf->num_vmdq_qps = 0;
+               pf->num_vmdq_msix = 0;
+               pf->num_lan_qps = 1;
+               pf->num_lan_msix = 1;
+
+       } else if (vec != v_budget) {
+               /* Scale vector usage down */
+               pf->num_vmdq_msix = 1;    /* force VMDqs to only one vector */
+               vec--;                    /* reserve the misc vector */
+
+               /* partition out the remaining vectors */
+               switch (vec) {
+               case 2:
+                       pf->num_vmdq_vsis = 1;
+                       pf->num_lan_msix = 1;
+                       break;
+               case 3:
+                       pf->num_vmdq_vsis = 1;
+                       pf->num_lan_msix = 2;
+                       break;
+               default:
+                       pf->num_lan_msix = min_t(int, (vec / 2),
+                                                pf->num_lan_qps);
+                       pf->num_vmdq_vsis = min_t(int, (vec - pf->num_lan_msix),
+                                                 I40E_DEFAULT_NUM_VMDQ_VSI);
+                       break;
+               }
+       }
+
+       return err;
+}
+
+/**
+ * i40e_alloc_q_vectors - Allocate memory for interrupt vectors
+ * @vsi: the VSI being configured
+ *
+ * We allocate one q_vector per queue interrupt.  If allocation fails we
+ * return -ENOMEM.
+ **/
+static int i40e_alloc_q_vectors(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       int v_idx, num_q_vectors;
+
+       /* if not MSIX, give the one vector only to the LAN VSI */
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED)
+               num_q_vectors = vsi->num_q_vectors;
+       else if (vsi == pf->vsi[pf->lan_vsi])
+               num_q_vectors = 1;
+       else
+               return -EINVAL;
+
+       vsi->q_vectors = kcalloc(num_q_vectors,
+                                sizeof(struct i40e_q_vector),
+                                GFP_KERNEL);
+       if (!vsi->q_vectors)
+               return -ENOMEM;
+
+       for (v_idx = 0; v_idx < num_q_vectors; v_idx++) {
+               vsi->q_vectors[v_idx].vsi = vsi;
+               vsi->q_vectors[v_idx].v_idx = v_idx;
+               cpumask_set_cpu(v_idx, &vsi->q_vectors[v_idx].affinity_mask);
+               if (vsi->netdev)
+                       netif_napi_add(vsi->netdev, &vsi->q_vectors[v_idx].napi,
+                                      i40e_napi_poll, vsi->work_limit);
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_init_interrupt_scheme - Determine proper interrupt scheme
+ * @pf: board private structure to initialize
+ **/
+static void i40e_init_interrupt_scheme(struct i40e_pf *pf)
+{
+       int err = 0;
+
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               err = i40e_init_msix(pf);
+               if (err) {
+                       pf->flags &= ~(I40E_FLAG_RSS_ENABLED       |
+                                       I40E_FLAG_MQ_ENABLED       |
+                                       I40E_FLAG_DCB_ENABLED      |
+                                       I40E_FLAG_SRIOV_ENABLED    |
+                                       I40E_FLAG_FDIR_ENABLED     |
+                                       I40E_FLAG_FDIR_ATR_ENABLED |
+                                       I40E_FLAG_VMDQ_ENABLED);
+
+                       /* rework the queue expectations without MSIX */
+                       i40e_determine_queue_usage(pf);
+               }
+       }
+
+       if (!(pf->flags & I40E_FLAG_MSIX_ENABLED) &&
+           (pf->flags & I40E_FLAG_MSI_ENABLED)) {
+               err = pci_enable_msi(pf->pdev);
+               if (err) {
+                       dev_info(&pf->pdev->dev,
+                                "MSI init failed (%d), trying legacy.\n", err);
+                       pf->flags &= ~I40E_FLAG_MSI_ENABLED;
+               }
+       }
+
+       /* track first vector for misc interrupts */
+       err = i40e_get_lump(pf, pf->irq_pile, 1, I40E_PILE_VALID_BIT-1);
+}
+
+/**
+ * i40e_setup_misc_vector - Setup the misc vector to handle non queue events
+ * @pf: board private structure
+ *
+ * This sets up the handler for MSIX 0, which is used to manage the
+ * non-queue interrupts, e.g. AdminQ and errors.  This is not used
+ * when in MSI or Legacy interrupt mode.
+ **/
+static int i40e_setup_misc_vector(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       int err = 0;
+
+       /* Only request the irq if this is the first time through, and
+        * not when we're rebuilding after a Reset
+        */
+       if (!test_bit(__I40E_RESET_RECOVERY_PENDING, &pf->state)) {
+               err = request_irq(pf->msix_entries[0].vector,
+                                 i40e_intr, 0, pf->misc_int_name, pf);
+               if (err) {
+                       dev_info(&pf->pdev->dev,
+                                "request_irq for msix_misc failed: %d\n", err);
+                       return -EFAULT;
+               }
+       }
+
+       i40e_enable_misc_int_causes(hw);
+
+       /* associate no queues to the misc vector */
+       wr32(hw, I40E_PFINT_LNKLST0, I40E_QUEUE_END_OF_LIST);
+       wr32(hw, I40E_PFINT_ITR0(I40E_RX_ITR), I40E_ITR_8K);
+
+       i40e_flush(hw);
+
+       i40e_irq_dynamic_enable_icr0(pf);
+
+       return err;
+}
+
+/**
+ * i40e_config_rss - Prepare for RSS if used
+ * @pf: board private structure
+ **/
+static int i40e_config_rss(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       u32 lut = 0;
+       int i, j;
+       u64 hena;
+       /* Set of random keys generated using kernel random number generator */
+       static const u32 seed[I40E_PFQF_HKEY_MAX_INDEX + 1] = {0x41b01687,
+                               0x183cfd8c, 0xce880440, 0x580cbc3c, 0x35897377,
+                               0x328b25e1, 0x4fa98922, 0xb7d90c14, 0xd5bad70d,
+                               0xcd15a2c1, 0xe8580225, 0x4a1e9d11, 0xfe5731be};
+
+       /* Fill out hash function seed */
+       for (i = 0; i <= I40E_PFQF_HKEY_MAX_INDEX; i++)
+               wr32(hw, I40E_PFQF_HKEY(i), seed[i]);
+
+       /* By default we enable TCP/UDP with IPv4/IPv6 ptypes */
+       hena = (u64)rd32(hw, I40E_PFQF_HENA(0)) |
+               ((u64)rd32(hw, I40E_PFQF_HENA(1)) << 32);
+       hena |= ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV4_TCP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_TCP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_IPV6_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP) |
+               ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV4)|
+               ((u64)1 << I40E_FILTER_PCTYPE_FRAG_IPV6);
+       wr32(hw, I40E_PFQF_HENA(0), (u32)hena);
+       wr32(hw, I40E_PFQF_HENA(1), (u32)(hena >> 32));
+
+       /* Populate the LUT with max no. of queues in round robin fashion */
+       for (i = 0, j = 0; i < pf->hw.func_caps.rss_table_size; i++, j++) {
+
+               /* The assumption is that lan qp count will be the highest
+                * qp count for any PF VSI that needs RSS.
+                * If multiple VSIs need RSS support, all the qp counts
+                * for those VSIs should be a power of 2 for RSS to work.
+                * If LAN VSI is the only consumer for RSS then this requirement
+                * is not necessary.
+                */
+               if (j == pf->rss_size)
+                       j = 0;
+               /* lut = 4-byte sliding window of 4 lut entries */
+               lut = (lut << 8) | (j &
+                        ((0x1 << pf->hw.func_caps.rss_table_entry_width) - 1));
+               /* On i = 3, we have 4 entries in lut; write to the register */
+               if ((i & 3) == 3)
+                       wr32(hw, I40E_PFQF_HLUT(i >> 2), lut);
+       }
+       i40e_flush(hw);
+
+       return 0;
+}
+
+/**
+ * i40e_sw_init - Initialize general software structures (struct i40e_pf)
+ * @pf: board private structure to initialize
+ *
+ * i40e_sw_init initializes the Adapter private data structure.
+ * Fields are initialized based on PCI device information and
+ * OS network device settings (MTU size).
+ **/
+static int i40e_sw_init(struct i40e_pf *pf)
+{
+       int err = 0;
+       int size;
+
+       pf->msg_enable = netif_msg_init(I40E_DEFAULT_MSG_ENABLE,
+                               (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK));
+       if (debug != -1 && debug != I40E_DEFAULT_MSG_ENABLE) {
+               if (I40E_DEBUG_USER & debug)
+                       pf->hw.debug_mask = debug;
+               pf->msg_enable = netif_msg_init((debug & ~I40E_DEBUG_USER),
+                                               I40E_DEFAULT_MSG_ENABLE);
+       }
+
+       /* Set default capability flags */
+       pf->flags = I40E_FLAG_RX_CSUM_ENABLED |
+                   I40E_FLAG_MSI_ENABLED     |
+                   I40E_FLAG_MSIX_ENABLED    |
+                   I40E_FLAG_RX_PS_ENABLED   |
+                   I40E_FLAG_MQ_ENABLED      |
+                   I40E_FLAG_RX_1BUF_ENABLED;
+
+       pf->rss_size_max = 0x1 << pf->hw.func_caps.rss_table_entry_width;
+       if (pf->hw.func_caps.rss) {
+               pf->flags |= I40E_FLAG_RSS_ENABLED;
+               pf->rss_size = min_t(int, pf->rss_size_max,
+                                    nr_cpus_node(numa_node_id()));
+       } else {
+               pf->rss_size = 1;
+       }
+
+       if (pf->hw.func_caps.dcb)
+               pf->num_tc_qps = I40E_DEFAULT_QUEUES_PER_TC;
+       else
+               pf->num_tc_qps = 0;
+
+       if (pf->hw.func_caps.fd) {
+               /* FW/NVM is not yet fixed in this regard */
+               if ((pf->hw.func_caps.fd_filters_guaranteed > 0) ||
+                   (pf->hw.func_caps.fd_filters_best_effort > 0)) {
+                       pf->flags |= I40E_FLAG_FDIR_ATR_ENABLED;
+                       dev_info(&pf->pdev->dev,
+                                "Flow Director ATR mode Enabled\n");
+                       pf->flags |= I40E_FLAG_FDIR_ENABLED;
+                       dev_info(&pf->pdev->dev,
+                                "Flow Director Side Band mode Enabled\n");
+                       pf->fdir_pf_filter_count =
+                                        pf->hw.func_caps.fd_filters_guaranteed;
+               }
+       } else {
+               pf->fdir_pf_filter_count = 0;
+       }
+
+       if (pf->hw.func_caps.vmdq) {
+               pf->flags |= I40E_FLAG_VMDQ_ENABLED;
+               pf->num_vmdq_vsis = I40E_DEFAULT_NUM_VMDQ_VSI;
+               pf->num_vmdq_qps = I40E_DEFAULT_QUEUES_PER_VMDQ;
+       }
+
+       /* MFP mode enabled */
+       if (pf->hw.func_caps.npar_enable || pf->hw.func_caps.mfp_mode_1) {
+               pf->flags |= I40E_FLAG_MFP_ENABLED;
+               dev_info(&pf->pdev->dev, "MFP mode Enabled\n");
+       }
+
+#ifdef CONFIG_PCI_IOV
+       if (pf->hw.func_caps.num_vfs) {
+               pf->num_vf_qps = I40E_DEFAULT_QUEUES_PER_VF;
+               pf->flags |= I40E_FLAG_SRIOV_ENABLED;
+               pf->num_req_vfs = min_t(int,
+                                       pf->hw.func_caps.num_vfs,
+                                       I40E_MAX_VF_COUNT);
+       }
+#endif /* CONFIG_PCI_IOV */
+       pf->eeprom_version = 0xDEAD;
+       pf->lan_veb = I40E_NO_VEB;
+       pf->lan_vsi = I40E_NO_VSI;
+
+       /* set up queue assignment tracking */
+       size = sizeof(struct i40e_lump_tracking)
+               + (sizeof(u16) * pf->hw.func_caps.num_tx_qp);
+       pf->qp_pile = kzalloc(size, GFP_KERNEL);
+       if (!pf->qp_pile) {
+               err = -ENOMEM;
+               goto sw_init_done;
+       }
+       pf->qp_pile->num_entries = pf->hw.func_caps.num_tx_qp;
+       pf->qp_pile->search_hint = 0;
+
+       /* set up vector assignment tracking */
+       size = sizeof(struct i40e_lump_tracking)
+               + (sizeof(u16) * pf->hw.func_caps.num_msix_vectors);
+       pf->irq_pile = kzalloc(size, GFP_KERNEL);
+       if (!pf->irq_pile) {
+               kfree(pf->qp_pile);
+               err = -ENOMEM;
+               goto sw_init_done;
+       }
+       pf->irq_pile->num_entries = pf->hw.func_caps.num_msix_vectors;
+       pf->irq_pile->search_hint = 0;
+
+       mutex_init(&pf->switch_mutex);
+
+sw_init_done:
+       return err;
+}
+
+/**
+ * i40e_set_features - set the netdev feature flags
+ * @netdev: ptr to the netdev being adjusted
+ * @features: the feature set that the stack is suggesting
+ **/
+static int i40e_set_features(struct net_device *netdev,
+                            netdev_features_t features)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+
+       if (features & NETIF_F_HW_VLAN_CTAG_RX)
+               i40e_vlan_stripping_enable(vsi);
+       else
+               i40e_vlan_stripping_disable(vsi);
+
+       return 0;
+}
+
+static const struct net_device_ops i40e_netdev_ops = {
+       .ndo_open               = i40e_open,
+       .ndo_stop               = i40e_close,
+       .ndo_start_xmit         = i40e_lan_xmit_frame,
+       .ndo_get_stats64        = i40e_get_netdev_stats_struct,
+       .ndo_set_rx_mode        = i40e_set_rx_mode,
+       .ndo_validate_addr      = eth_validate_addr,
+       .ndo_set_mac_address    = i40e_set_mac,
+       .ndo_change_mtu         = i40e_change_mtu,
+       .ndo_tx_timeout         = i40e_tx_timeout,
+       .ndo_vlan_rx_add_vid    = i40e_vlan_rx_add_vid,
+       .ndo_vlan_rx_kill_vid   = i40e_vlan_rx_kill_vid,
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       .ndo_poll_controller    = i40e_netpoll,
+#endif
+       .ndo_setup_tc           = i40e_setup_tc,
+       .ndo_set_features       = i40e_set_features,
+       .ndo_set_vf_mac         = i40e_ndo_set_vf_mac,
+       .ndo_set_vf_vlan        = i40e_ndo_set_vf_port_vlan,
+       .ndo_set_vf_tx_rate     = i40e_ndo_set_vf_bw,
+       .ndo_get_vf_config      = i40e_ndo_get_vf_config,
+};
+
+/**
+ * i40e_config_netdev - Setup the netdev flags
+ * @vsi: the VSI being configured
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_config_netdev(struct i40e_vsi *vsi)
+{
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_netdev_priv *np;
+       struct net_device *netdev;
+       u8 mac_addr[ETH_ALEN];
+       int etherdev_size;
+
+       etherdev_size = sizeof(struct i40e_netdev_priv);
+       netdev = alloc_etherdev_mq(etherdev_size, vsi->alloc_queue_pairs);
+       if (!netdev)
+               return -ENOMEM;
+
+       vsi->netdev = netdev;
+       np = netdev_priv(netdev);
+       np->vsi = vsi;
+
+       netdev->hw_enc_features = NETIF_F_IP_CSUM        |
+                                 NETIF_F_GSO_UDP_TUNNEL |
+                                 NETIF_F_TSO            |
+                                 NETIF_F_SG;
+
+       netdev->features = NETIF_F_SG                  |
+                          NETIF_F_IP_CSUM             |
+                          NETIF_F_SCTP_CSUM           |
+                          NETIF_F_HIGHDMA             |
+                          NETIF_F_GSO_UDP_TUNNEL      |
+                          NETIF_F_HW_VLAN_CTAG_TX     |
+                          NETIF_F_HW_VLAN_CTAG_RX     |
+                          NETIF_F_HW_VLAN_CTAG_FILTER |
+                          NETIF_F_IPV6_CSUM           |
+                          NETIF_F_TSO                 |
+                          NETIF_F_TSO6                |
+                          NETIF_F_RXCSUM              |
+                          NETIF_F_RXHASH              |
+                          0;
+
+       /* copy netdev features into list of user selectable features */
+       netdev->hw_features |= netdev->features;
+
+       if (vsi->type == I40E_VSI_MAIN) {
+               SET_NETDEV_DEV(netdev, &pf->pdev->dev);
+               memcpy(mac_addr, hw->mac.perm_addr, ETH_ALEN);
+       } else {
+               /* relate the VSI_VMDQ name to the VSI_MAIN name */
+               snprintf(netdev->name, IFNAMSIZ, "%sv%%d",
+                        pf->vsi[pf->lan_vsi]->netdev->name);
+               random_ether_addr(mac_addr);
+               i40e_add_filter(vsi, mac_addr, I40E_VLAN_ANY, false, false);
+       }
+
+       memcpy(netdev->dev_addr, mac_addr, ETH_ALEN);
+       memcpy(netdev->perm_addr, mac_addr, ETH_ALEN);
+       /* vlan gets same features (except vlan offload)
+        * after any tweaks for specific VSI types
+        */
+       netdev->vlan_features = netdev->features & ~(NETIF_F_HW_VLAN_CTAG_TX |
+                                                    NETIF_F_HW_VLAN_CTAG_RX |
+                                                  NETIF_F_HW_VLAN_CTAG_FILTER);
+       netdev->priv_flags |= IFF_UNICAST_FLT;
+       netdev->priv_flags |= IFF_SUPP_NOFCS;
+       /* Setup netdev TC information */
+       i40e_vsi_config_netdev_tc(vsi, vsi->tc_config.enabled_tc);
+
+       netdev->netdev_ops = &i40e_netdev_ops;
+       netdev->watchdog_timeo = 5 * HZ;
+       i40e_set_ethtool_ops(netdev);
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_delete - Delete a VSI from the switch
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static void i40e_vsi_delete(struct i40e_vsi *vsi)
+{
+       /* remove default VSI is not allowed */
+       if (vsi == vsi->back->vsi[vsi->back->lan_vsi])
+               return;
+
+       /* there is no HW VSI for FDIR */
+       if (vsi->type == I40E_VSI_FDIR)
+               return;
+
+       i40e_aq_delete_element(&vsi->back->hw, vsi->seid, NULL);
+       return;
+}
+
+/**
+ * i40e_add_vsi - Add a VSI to the switch
+ * @vsi: the VSI being configured
+ *
+ * This initializes a VSI context depending on the VSI type to be added and
+ * passes it down to the add_vsi aq command.
+ **/
+static int i40e_add_vsi(struct i40e_vsi *vsi)
+{
+       int ret = -ENODEV;
+       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vsi_context ctxt;
+       u8 enabled_tc = 0x1; /* TC0 enabled */
+       int f_count = 0;
+
+       memset(&ctxt, 0, sizeof(ctxt));
+       switch (vsi->type) {
+       case I40E_VSI_MAIN:
+               /* The PF's main VSI is already setup as part of the
+                * device initialization, so we'll not bother with
+                * the add_vsi call, but we will retrieve the current
+                * VSI context.
+                */
+               ctxt.seid = pf->main_vsi_seid;
+               ctxt.pf_num = pf->hw.pf_id;
+               ctxt.vf_num = 0;
+               ret = i40e_aq_get_vsi_params(&pf->hw, &ctxt, NULL);
+               ctxt.flags = I40E_AQ_VSI_TYPE_PF;
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "couldn't get pf vsi config, err %d, aq_err %d\n",
+                                ret, pf->hw.aq.asq_last_status);
+                       return -ENOENT;
+               }
+               memcpy(&vsi->info, &ctxt.info, sizeof(ctxt.info));
+               vsi->info.valid_sections = 0;
+
+               vsi->seid = ctxt.seid;
+               vsi->id = ctxt.vsi_number;
+
+               enabled_tc = i40e_pf_get_tc_map(pf);
+
+               /* MFP mode setup queue map and update VSI */
+               if (pf->flags & I40E_FLAG_MFP_ENABLED) {
+                       memset(&ctxt, 0, sizeof(ctxt));
+                       ctxt.seid = pf->main_vsi_seid;
+                       ctxt.pf_num = pf->hw.pf_id;
+                       ctxt.vf_num = 0;
+                       i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, false);
+                       ret = i40e_aq_update_vsi_params(hw, &ctxt, NULL);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "update vsi failed, aq_err=%d\n",
+                                        pf->hw.aq.asq_last_status);
+                               ret = -ENOENT;
+                               goto err;
+                       }
+                       /* update the local VSI info queue map */
+                       i40e_vsi_update_queue_map(vsi, &ctxt);
+                       vsi->info.valid_sections = 0;
+               } else {
+                       /* Default/Main VSI is only enabled for TC0
+                        * reconfigure it to enable all TCs that are
+                        * available on the port in SFP mode.
+                        */
+                       ret = i40e_vsi_config_tc(vsi, enabled_tc);
+                       if (ret) {
+                               dev_info(&pf->pdev->dev,
+                                        "failed to configure TCs for main VSI tc_map 0x%08x, err %d, aq_err %d\n",
+                                        enabled_tc, ret,
+                                        pf->hw.aq.asq_last_status);
+                               ret = -ENOENT;
+                       }
+               }
+               break;
+
+       case I40E_VSI_FDIR:
+               /* no queue mapping or actual HW VSI needed */
+               vsi->info.valid_sections = 0;
+               vsi->seid = 0;
+               vsi->id = 0;
+               i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+               return 0;
+               break;
+
+       case I40E_VSI_VMDQ2:
+               ctxt.pf_num = hw->pf_id;
+               ctxt.vf_num = 0;
+               ctxt.uplink_seid = vsi->uplink_seid;
+               ctxt.connection_type = 0x1;     /* regular data port */
+               ctxt.flags = I40E_AQ_VSI_TYPE_VMDQ2;
+
+               ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+
+               /* This VSI is connected to VEB so the switch_id
+                * should be set to zero by default.
+                */
+               ctxt.info.switch_id = 0;
+               ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_LOCAL_LB);
+               ctxt.info.switch_id |= cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+
+               /* Setup the VSI tx/rx queue map for TC0 only for now */
+               i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+               break;
+
+       case I40E_VSI_SRIOV:
+               ctxt.pf_num = hw->pf_id;
+               ctxt.vf_num = vsi->vf_id + hw->func_caps.vf_base_id;
+               ctxt.uplink_seid = vsi->uplink_seid;
+               ctxt.connection_type = 0x1;     /* regular data port */
+               ctxt.flags = I40E_AQ_VSI_TYPE_VF;
+
+               ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_SWITCH_VALID);
+
+               /* This VSI is connected to VEB so the switch_id
+                * should be set to zero by default.
+                */
+               ctxt.info.switch_id = cpu_to_le16(I40E_AQ_VSI_SW_ID_FLAG_ALLOW_LB);
+
+               ctxt.info.valid_sections |= cpu_to_le16(I40E_AQ_VSI_PROP_VLAN_VALID);
+               ctxt.info.port_vlan_flags |= I40E_AQ_VSI_PVLAN_MODE_ALL;
+               /* Setup the VSI tx/rx queue map for TC0 only for now */
+               i40e_vsi_setup_queue_map(vsi, &ctxt, enabled_tc, true);
+               break;
+
+       default:
+               return -ENODEV;
+       }
+
+       if (vsi->type != I40E_VSI_MAIN) {
+               ret = i40e_aq_add_vsi(hw, &ctxt, NULL);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "add vsi failed, aq_err=%d\n",
+                                vsi->back->hw.aq.asq_last_status);
+                       ret = -ENOENT;
+                       goto err;
+               }
+               memcpy(&vsi->info, &ctxt.info, sizeof(ctxt.info));
+               vsi->info.valid_sections = 0;
+               vsi->seid = ctxt.seid;
+               vsi->id = ctxt.vsi_number;
+       }
+
+       /* If macvlan filters already exist, force them to get loaded */
+       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+               f->changed = true;
+               f_count++;
+       }
+       if (f_count) {
+               vsi->flags |= I40E_VSI_FLAG_FILTER_CHANGED;
+               pf->flags |= I40E_FLAG_FILTER_SYNC;
+       }
+
+       /* Update VSI BW information */
+       ret = i40e_vsi_get_bw_info(vsi);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "couldn't get vsi bw info, err %d, aq_err %d\n",
+                        ret, pf->hw.aq.asq_last_status);
+               /* VSI is already added so not tearing that up */
+               ret = 0;
+       }
+
+err:
+       return ret;
+}
+
+/**
+ * i40e_vsi_release - Delete a VSI and free its resources
+ * @vsi: the VSI being removed
+ *
+ * Returns 0 on success or < 0 on error
+ **/
+int i40e_vsi_release(struct i40e_vsi *vsi)
+{
+       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_veb *veb = NULL;
+       struct i40e_pf *pf;
+       u16 uplink_seid;
+       int i, n;
+
+       pf = vsi->back;
+
+       /* release of a VEB-owner or last VSI is not allowed */
+       if (vsi->flags & I40E_VSI_FLAG_VEB_OWNER) {
+               dev_info(&pf->pdev->dev, "VSI %d has existing VEB %d\n",
+                        vsi->seid, vsi->uplink_seid);
+               return -ENODEV;
+       }
+       if (vsi == pf->vsi[pf->lan_vsi] &&
+           !test_bit(__I40E_DOWN, &pf->state)) {
+               dev_info(&pf->pdev->dev, "Can't remove PF VSI\n");
+               return -ENODEV;
+       }
+
+       uplink_seid = vsi->uplink_seid;
+       if (vsi->type != I40E_VSI_SRIOV) {
+               if (vsi->netdev_registered) {
+                       vsi->netdev_registered = false;
+                       if (vsi->netdev) {
+                               /* results in a call to i40e_close() */
+                               unregister_netdev(vsi->netdev);
+                               free_netdev(vsi->netdev);
+                               vsi->netdev = NULL;
+                       }
+               } else {
+                       if (!test_and_set_bit(__I40E_DOWN, &vsi->state))
+                               i40e_down(vsi);
+                       i40e_vsi_free_irq(vsi);
+                       i40e_vsi_free_tx_resources(vsi);
+                       i40e_vsi_free_rx_resources(vsi);
+               }
+               i40e_vsi_disable_irq(vsi);
+       }
+
+       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list)
+               i40e_del_filter(vsi, f->macaddr, f->vlan,
+                               f->is_vf, f->is_netdev);
+       i40e_sync_vsi_filters(vsi);
+
+       i40e_vsi_delete(vsi);
+       i40e_vsi_free_q_vectors(vsi);
+       i40e_vsi_clear_rings(vsi);
+       i40e_vsi_clear(vsi);
+
+       /* If this was the last thing on the VEB, except for the
+        * controlling VSI, remove the VEB, which puts the controlling
+        * VSI onto the next level down in the switch.
+        *
+        * Well, okay, there's one more exception here: don't remove
+        * the orphan VEBs yet.  We'll wait for an explicit remove request
+        * from up the network stack.
+        */
+       for (n = 0, i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+               if (pf->vsi[i] &&
+                   pf->vsi[i]->uplink_seid == uplink_seid &&
+                   (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
+                       n++;      /* count the VSIs */
+               }
+       }
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               if (!pf->veb[i])
+                       continue;
+               if (pf->veb[i]->uplink_seid == uplink_seid)
+                       n++;     /* count the VEBs */
+               if (pf->veb[i]->seid == uplink_seid)
+                       veb = pf->veb[i];
+       }
+       if (n == 0 && veb && veb->uplink_seid != 0)
+               i40e_veb_release(veb);
+
+       return 0;
+}
+
+/**
+ * i40e_vsi_setup_vectors - Set up the q_vectors for the given VSI
+ * @vsi: ptr to the VSI
+ *
+ * This should only be called after i40e_vsi_mem_alloc() which allocates the
+ * corresponding SW VSI structure and initializes num_queue_pairs for the
+ * newly allocated VSI.
+ *
+ * Returns 0 on success or negative on failure
+ **/
+static int i40e_vsi_setup_vectors(struct i40e_vsi *vsi)
+{
+       int ret = -ENOENT;
+       struct i40e_pf *pf = vsi->back;
+
+       if (vsi->q_vectors) {
+               dev_info(&pf->pdev->dev, "VSI %d has existing q_vectors\n",
+                        vsi->seid);
+               return -EEXIST;
+       }
+
+       if (vsi->base_vector) {
+               dev_info(&pf->pdev->dev,
+                        "VSI %d has non-zero base vector %d\n",
+                        vsi->seid, vsi->base_vector);
+               return -EEXIST;
+       }
+
+       ret = i40e_alloc_q_vectors(vsi);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "failed to allocate %d q_vector for VSI %d, ret=%d\n",
+                        vsi->num_q_vectors, vsi->seid, ret);
+               vsi->num_q_vectors = 0;
+               goto vector_setup_out;
+       }
+
+       vsi->base_vector = i40e_get_lump(pf, pf->irq_pile,
+                                        vsi->num_q_vectors, vsi->idx);
+       if (vsi->base_vector < 0) {
+               dev_info(&pf->pdev->dev,
+                        "failed to get q tracking for VSI %d, err=%d\n",
+                        vsi->seid, vsi->base_vector);
+               i40e_vsi_free_q_vectors(vsi);
+               ret = -ENOENT;
+               goto vector_setup_out;
+       }
+
+vector_setup_out:
+       return ret;
+}
+
+/**
+ * i40e_vsi_setup - Set up a VSI by a given type
+ * @pf: board private structure
+ * @type: VSI type
+ * @uplink_seid: the switch element to link to
+ * @param1: usage depends upon VSI type. For VF types, indicates VF id
+ *
+ * This allocates the sw VSI structure and its queue resources, then add a VSI
+ * to the identified VEB.
+ *
+ * Returns pointer to the successfully allocated and configure VSI sw struct on
+ * success, otherwise returns NULL on failure.
+ **/
+struct i40e_vsi *i40e_vsi_setup(struct i40e_pf *pf, u8 type,
+                               u16 uplink_seid, u32 param1)
+{
+       struct i40e_vsi *vsi = NULL;
+       struct i40e_veb *veb = NULL;
+       int ret, i;
+       int v_idx;
+
+       /* The requested uplink_seid must be either
+        *     - the PF's port seid
+        *              no VEB is needed because this is the PF
+        *              or this is a Flow Director special case VSI
+        *     - seid of an existing VEB
+        *     - seid of a VSI that owns an existing VEB
+        *     - seid of a VSI that doesn't own a VEB
+        *              a new VEB is created and the VSI becomes the owner
+        *     - seid of the PF VSI, which is what creates the first VEB
+        *              this is a special case of the previous
+        *
+        * Find which uplink_seid we were given and create a new VEB if needed
+        */
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               if (pf->veb[i] && pf->veb[i]->seid == uplink_seid) {
+                       veb = pf->veb[i];
+                       break;
+               }
+       }
+
+       if (!veb && uplink_seid != pf->mac_seid) {
+
+               for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+                       if (pf->vsi[i] && pf->vsi[i]->seid == uplink_seid) {
+                               vsi = pf->vsi[i];
+                               break;
+                       }
+               }
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "no such uplink_seid %d\n",
+                                uplink_seid);
+                       return NULL;
+               }
+
+               if (vsi->uplink_seid == pf->mac_seid)
+                       veb = i40e_veb_setup(pf, 0, pf->mac_seid, vsi->seid,
+                                            vsi->tc_config.enabled_tc);
+               else if ((vsi->flags & I40E_VSI_FLAG_VEB_OWNER) == 0)
+                       veb = i40e_veb_setup(pf, 0, vsi->uplink_seid, vsi->seid,
+                                            vsi->tc_config.enabled_tc);
+
+               for (i = 0; i < I40E_MAX_VEB && !veb; i++) {
+                       if (pf->veb[i] && pf->veb[i]->seid == vsi->uplink_seid)
+                               veb = pf->veb[i];
+               }
+               if (!veb) {
+                       dev_info(&pf->pdev->dev, "couldn't add VEB\n");
+                       return NULL;
+               }
+
+               vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+               uplink_seid = veb->seid;
+       }
+
+       /* get vsi sw struct */
+       v_idx = i40e_vsi_mem_alloc(pf, type);
+       if (v_idx < 0)
+               goto err_alloc;
+       vsi = pf->vsi[v_idx];
+       vsi->type = type;
+       vsi->veb_idx = (veb ? veb->idx : I40E_NO_VEB);
+
+       if (type == I40E_VSI_MAIN)
+               pf->lan_vsi = v_idx;
+       else if (type == I40E_VSI_SRIOV)
+               vsi->vf_id = param1;
+       /* assign it some queues */
+       ret = i40e_get_lump(pf, pf->qp_pile, vsi->alloc_queue_pairs, vsi->idx);
+       if (ret < 0) {
+               dev_info(&pf->pdev->dev, "VSI %d get_lump failed %d\n",
+                        vsi->seid, ret);
+               goto err_vsi;
+       }
+       vsi->base_queue = ret;
+
+       /* get a VSI from the hardware */
+       vsi->uplink_seid = uplink_seid;
+       ret = i40e_add_vsi(vsi);
+       if (ret)
+               goto err_vsi;
+
+       switch (vsi->type) {
+       /* setup the netdev if needed */
+       case I40E_VSI_MAIN:
+       case I40E_VSI_VMDQ2:
+               ret = i40e_config_netdev(vsi);
+               if (ret)
+                       goto err_netdev;
+               ret = register_netdev(vsi->netdev);
+               if (ret)
+                       goto err_netdev;
+               vsi->netdev_registered = true;
+               netif_carrier_off(vsi->netdev);
+               /* fall through */
+
+       case I40E_VSI_FDIR:
+               /* set up vectors and rings if needed */
+               ret = i40e_vsi_setup_vectors(vsi);
+               if (ret)
+                       goto err_msix;
+
+               ret = i40e_alloc_rings(vsi);
+               if (ret)
+                       goto err_rings;
+
+               /* map all of the rings to the q_vectors */
+               i40e_vsi_map_rings_to_vectors(vsi);
+
+               i40e_vsi_reset_stats(vsi);
+               break;
+
+       default:
+               /* no netdev or rings for the other VSI types */
+               break;
+       }
+
+       return vsi;
+
+err_rings:
+       i40e_vsi_free_q_vectors(vsi);
+err_msix:
+       if (vsi->netdev_registered) {
+               vsi->netdev_registered = false;
+               unregister_netdev(vsi->netdev);
+               free_netdev(vsi->netdev);
+               vsi->netdev = NULL;
+       }
+err_netdev:
+       i40e_aq_delete_element(&pf->hw, vsi->seid, NULL);
+err_vsi:
+       i40e_vsi_clear(vsi);
+err_alloc:
+       return NULL;
+}
+
+/**
+ * i40e_veb_get_bw_info - Query VEB BW information
+ * @veb: the veb to query
+ *
+ * Query the Tx scheduler BW configuration data for given VEB
+ **/
+static int i40e_veb_get_bw_info(struct i40e_veb *veb)
+{
+       struct i40e_aqc_query_switching_comp_ets_config_resp ets_data;
+       struct i40e_aqc_query_switching_comp_bw_config_resp bw_data;
+       struct i40e_pf *pf = veb->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u32 tc_bw_max;
+       int ret = 0;
+       int i;
+
+       ret = i40e_aq_query_switch_comp_bw_config(hw, veb->seid,
+                                                 &bw_data, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "query veb bw config failed, aq_err=%d\n",
+                        hw->aq.asq_last_status);
+               goto out;
+       }
+
+       ret = i40e_aq_query_switch_comp_ets_config(hw, veb->seid,
+                                                  &ets_data, NULL);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "query veb bw ets config failed, aq_err=%d\n",
+                        hw->aq.asq_last_status);
+               goto out;
+       }
+
+       veb->bw_limit = le16_to_cpu(ets_data.port_bw_limit);
+       veb->bw_max_quanta = ets_data.tc_bw_max;
+       veb->is_abs_credits = bw_data.absolute_credits_enable;
+       tc_bw_max = le16_to_cpu(bw_data.tc_bw_max[0]) |
+                   (le16_to_cpu(bw_data.tc_bw_max[1]) << 16);
+       for (i = 0; i < I40E_MAX_TRAFFIC_CLASS; i++) {
+               veb->bw_tc_share_credits[i] = bw_data.tc_bw_share_credits[i];
+               veb->bw_tc_limit_credits[i] =
+                                       le16_to_cpu(bw_data.tc_bw_limits[i]);
+               veb->bw_tc_max_quanta[i] = ((tc_bw_max >> (i*4)) & 0x7);
+       }
+
+out:
+       return ret;
+}
+
+/**
+ * i40e_veb_mem_alloc - Allocates the next available struct veb in the PF
+ * @pf: board private structure
+ *
+ * On error: returns error code (negative)
+ * On success: returns vsi index in PF (positive)
+ **/
+static int i40e_veb_mem_alloc(struct i40e_pf *pf)
+{
+       int ret = -ENOENT;
+       struct i40e_veb *veb;
+       int i;
+
+       /* Need to protect the allocation of switch elements at the PF level */
+       mutex_lock(&pf->switch_mutex);
+
+       /* VEB list may be fragmented if VEB creation/destruction has
+        * been happening.  We can afford to do a quick scan to look
+        * for any free slots in the list.
+        *
+        * find next empty veb slot, looping back around if necessary
+        */
+       i = 0;
+       while ((i < I40E_MAX_VEB) && (pf->veb[i] != NULL))
+               i++;
+       if (i >= I40E_MAX_VEB) {
+               ret = -ENOMEM;
+               goto err_alloc_veb;  /* out of VEB slots! */
+       }
+
+       veb = kzalloc(sizeof(*veb), GFP_KERNEL);
+       if (!veb) {
+               ret = -ENOMEM;
+               goto err_alloc_veb;
+       }
+       veb->pf = pf;
+       veb->idx = i;
+       veb->enabled_tc = 1;
+
+       pf->veb[i] = veb;
+       ret = i;
+err_alloc_veb:
+       mutex_unlock(&pf->switch_mutex);
+       return ret;
+}
+
+/**
+ * i40e_switch_branch_release - Delete a branch of the switch tree
+ * @branch: where to start deleting
+ *
+ * This uses recursion to find the tips of the branch to be
+ * removed, deleting until we get back to and can delete this VEB.
+ **/
+static void i40e_switch_branch_release(struct i40e_veb *branch)
+{
+       struct i40e_pf *pf = branch->pf;
+       u16 branch_seid = branch->seid;
+       u16 veb_idx = branch->idx;
+       int i;
+
+       /* release any VEBs on this VEB - RECURSION */
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               if (!pf->veb[i])
+                       continue;
+               if (pf->veb[i]->uplink_seid == branch->seid)
+                       i40e_switch_branch_release(pf->veb[i]);
+       }
+
+       /* Release the VSIs on this VEB, but not the owner VSI.
+        *
+        * NOTE: Removing the last VSI on a VEB has the SIDE EFFECT of removing
+        *       the VEB itself, so don't use (*branch) after this loop.
+        */
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+               if (!pf->vsi[i])
+                       continue;
+               if (pf->vsi[i]->uplink_seid == branch_seid &&
+                  (pf->vsi[i]->flags & I40E_VSI_FLAG_VEB_OWNER) == 0) {
+                       i40e_vsi_release(pf->vsi[i]);
+               }
+       }
+
+       /* There's one corner case where the VEB might not have been
+        * removed, so double check it here and remove it if needed.
+        * This case happens if the veb was created from the debugfs
+        * commands and no VSIs were added to it.
+        */
+       if (pf->veb[veb_idx])
+               i40e_veb_release(pf->veb[veb_idx]);
+}
+
+/**
+ * i40e_veb_clear - remove veb struct
+ * @veb: the veb to remove
+ **/
+static void i40e_veb_clear(struct i40e_veb *veb)
+{
+       if (!veb)
+               return;
+
+       if (veb->pf) {
+               struct i40e_pf *pf = veb->pf;
+
+               mutex_lock(&pf->switch_mutex);
+               if (pf->veb[veb->idx] == veb)
+                       pf->veb[veb->idx] = NULL;
+               mutex_unlock(&pf->switch_mutex);
+       }
+
+       kfree(veb);
+}
+
+/**
+ * i40e_veb_release - Delete a VEB and free its resources
+ * @veb: the VEB being removed
+ **/
+void i40e_veb_release(struct i40e_veb *veb)
+{
+       struct i40e_vsi *vsi = NULL;
+       struct i40e_pf *pf;
+       int i, n = 0;
+
+       pf = veb->pf;
+
+       /* find the remaining VSI and check for extras */
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+               if (pf->vsi[i] && pf->vsi[i]->uplink_seid == veb->seid) {
+                       n++;
+                       vsi = pf->vsi[i];
+               }
+       }
+       if (n != 1) {
+               dev_info(&pf->pdev->dev,
+                        "can't remove VEB %d with %d VSIs left\n",
+                        veb->seid, n);
+               return;
+       }
+
+       /* move the remaining VSI to uplink veb */
+       vsi->flags &= ~I40E_VSI_FLAG_VEB_OWNER;
+       if (veb->uplink_seid) {
+               vsi->uplink_seid = veb->uplink_seid;
+               if (veb->uplink_seid == pf->mac_seid)
+                       vsi->veb_idx = I40E_NO_VEB;
+               else
+                       vsi->veb_idx = veb->veb_idx;
+       } else {
+               /* floating VEB */
+               vsi->uplink_seid = pf->vsi[pf->lan_vsi]->uplink_seid;
+               vsi->veb_idx = pf->vsi[pf->lan_vsi]->veb_idx;
+       }
+
+       i40e_aq_delete_element(&pf->hw, veb->seid, NULL);
+       i40e_veb_clear(veb);
+
+       return;
+}
+
+/**
+ * i40e_add_veb - create the VEB in the switch
+ * @veb: the VEB to be instantiated
+ * @vsi: the controlling VSI
+ **/
+static int i40e_add_veb(struct i40e_veb *veb, struct i40e_vsi *vsi)
+{
+       bool is_default = (vsi->idx == vsi->back->lan_vsi);
+       int ret;
+
+       /* get a VEB from the hardware */
+       ret = i40e_aq_add_veb(&veb->pf->hw, veb->uplink_seid, vsi->seid,
+                             veb->enabled_tc, is_default, &veb->seid, NULL);
+       if (ret) {
+               dev_info(&veb->pf->pdev->dev,
+                        "couldn't add VEB, err %d, aq_err %d\n",
+                        ret, veb->pf->hw.aq.asq_last_status);
+               return -EPERM;
+       }
+
+       /* get statistics counter */
+       ret = i40e_aq_get_veb_parameters(&veb->pf->hw, veb->seid, NULL, NULL,
+                                        &veb->stats_idx, NULL, NULL, NULL);
+       if (ret) {
+               dev_info(&veb->pf->pdev->dev,
+                        "couldn't get VEB statistics idx, err %d, aq_err %d\n",
+                        ret, veb->pf->hw.aq.asq_last_status);
+               return -EPERM;
+       }
+       ret = i40e_veb_get_bw_info(veb);
+       if (ret) {
+               dev_info(&veb->pf->pdev->dev,
+                        "couldn't get VEB bw info, err %d, aq_err %d\n",
+                        ret, veb->pf->hw.aq.asq_last_status);
+               i40e_aq_delete_element(&veb->pf->hw, veb->seid, NULL);
+               return -ENOENT;
+       }
+
+       vsi->uplink_seid = veb->seid;
+       vsi->veb_idx = veb->idx;
+       vsi->flags |= I40E_VSI_FLAG_VEB_OWNER;
+
+       return 0;
+}
+
+/**
+ * i40e_veb_setup - Set up a VEB
+ * @pf: board private structure
+ * @flags: VEB setup flags
+ * @uplink_seid: the switch element to link to
+ * @vsi_seid: the initial VSI seid
+ * @enabled_tc: Enabled TC bit-map
+ *
+ * This allocates the sw VEB structure and links it into the switch
+ * It is possible and legal for this to be a duplicate of an already
+ * existing VEB.  It is also possible for both uplink and vsi seids
+ * to be zero, in order to create a floating VEB.
+ *
+ * Returns pointer to the successfully allocated VEB sw struct on
+ * success, otherwise returns NULL on failure.
+ **/
+struct i40e_veb *i40e_veb_setup(struct i40e_pf *pf, u16 flags,
+                               u16 uplink_seid, u16 vsi_seid,
+                               u8 enabled_tc)
+{
+       struct i40e_veb *veb, *uplink_veb = NULL;
+       int vsi_idx, veb_idx;
+       int ret;
+
+       /* if one seid is 0, the other must be 0 to create a floating relay */
+       if ((uplink_seid == 0 || vsi_seid == 0) &&
+           (uplink_seid + vsi_seid != 0)) {
+               dev_info(&pf->pdev->dev,
+                        "one, not both seid's are 0: uplink=%d vsi=%d\n",
+                        uplink_seid, vsi_seid);
+               return NULL;
+       }
+
+       /* make sure there is such a vsi and uplink */
+       for (vsi_idx = 0; vsi_idx < pf->hw.func_caps.num_vsis; vsi_idx++)
+               if (pf->vsi[vsi_idx] && pf->vsi[vsi_idx]->seid == vsi_seid)
+                       break;
+       if (vsi_idx >= pf->hw.func_caps.num_vsis && vsi_seid != 0) {
+               dev_info(&pf->pdev->dev, "vsi seid %d not found\n",
+                        vsi_seid);
+               return NULL;
+       }
+
+       if (uplink_seid && uplink_seid != pf->mac_seid) {
+               for (veb_idx = 0; veb_idx < I40E_MAX_VEB; veb_idx++) {
+                       if (pf->veb[veb_idx] &&
+                           pf->veb[veb_idx]->seid == uplink_seid) {
+                               uplink_veb = pf->veb[veb_idx];
+                               break;
+                       }
+               }
+               if (!uplink_veb) {
+                       dev_info(&pf->pdev->dev,
+                                "uplink seid %d not found\n", uplink_seid);
+                       return NULL;
+               }
+       }
+
+       /* get veb sw struct */
+       veb_idx = i40e_veb_mem_alloc(pf);
+       if (veb_idx < 0)
+               goto err_alloc;
+       veb = pf->veb[veb_idx];
+       veb->flags = flags;
+       veb->uplink_seid = uplink_seid;
+       veb->veb_idx = (uplink_veb ? uplink_veb->idx : I40E_NO_VEB);
+       veb->enabled_tc = (enabled_tc ? enabled_tc : 0x1);
+
+       /* create the VEB in the switch */
+       ret = i40e_add_veb(veb, pf->vsi[vsi_idx]);
+       if (ret)
+               goto err_veb;
+
+       return veb;
+
+err_veb:
+       i40e_veb_clear(veb);
+err_alloc:
+       return NULL;
+}
+
+/**
+ * i40e_setup_pf_switch_element - set pf vars based on switch type
+ * @pf: board private structure
+ * @ele: element we are building info from
+ * @num_reported: total number of elements
+ * @printconfig: should we print the contents
+ *
+ * helper function to assist in extracting a few useful SEID values.
+ **/
+static void i40e_setup_pf_switch_element(struct i40e_pf *pf,
+                               struct i40e_aqc_switch_config_element_resp *ele,
+                               u16 num_reported, bool printconfig)
+{
+       u16 downlink_seid = le16_to_cpu(ele->downlink_seid);
+       u16 uplink_seid = le16_to_cpu(ele->uplink_seid);
+       u8 element_type = ele->element_type;
+       u16 seid = le16_to_cpu(ele->seid);
+
+       if (printconfig)
+               dev_info(&pf->pdev->dev,
+                        "type=%d seid=%d uplink=%d downlink=%d\n",
+                        element_type, seid, uplink_seid, downlink_seid);
+
+       switch (element_type) {
+       case I40E_SWITCH_ELEMENT_TYPE_MAC:
+               pf->mac_seid = seid;
+               break;
+       case I40E_SWITCH_ELEMENT_TYPE_VEB:
+               /* Main VEB? */
+               if (uplink_seid != pf->mac_seid)
+                       break;
+               if (pf->lan_veb == I40E_NO_VEB) {
+                       int v;
+
+                       /* find existing or else empty VEB */
+                       for (v = 0; v < I40E_MAX_VEB; v++) {
+                               if (pf->veb[v] && (pf->veb[v]->seid == seid)) {
+                                       pf->lan_veb = v;
+                                       break;
+                               }
+                       }
+                       if (pf->lan_veb == I40E_NO_VEB) {
+                               v = i40e_veb_mem_alloc(pf);
+                               if (v < 0)
+                                       break;
+                               pf->lan_veb = v;
+                       }
+               }
+
+               pf->veb[pf->lan_veb]->seid = seid;
+               pf->veb[pf->lan_veb]->uplink_seid = pf->mac_seid;
+               pf->veb[pf->lan_veb]->pf = pf;
+               pf->veb[pf->lan_veb]->veb_idx = I40E_NO_VEB;
+               break;
+       case I40E_SWITCH_ELEMENT_TYPE_VSI:
+               if (num_reported != 1)
+                       break;
+               /* This is immediately after a reset so we can assume this is
+                * the PF's VSI
+                */
+               pf->mac_seid = uplink_seid;
+               pf->pf_seid = downlink_seid;
+               pf->main_vsi_seid = seid;
+               if (printconfig)
+                       dev_info(&pf->pdev->dev,
+                                "pf_seid=%d main_vsi_seid=%d\n",
+                                pf->pf_seid, pf->main_vsi_seid);
+               break;
+       case I40E_SWITCH_ELEMENT_TYPE_PF:
+       case I40E_SWITCH_ELEMENT_TYPE_VF:
+       case I40E_SWITCH_ELEMENT_TYPE_EMP:
+       case I40E_SWITCH_ELEMENT_TYPE_BMC:
+       case I40E_SWITCH_ELEMENT_TYPE_PE:
+       case I40E_SWITCH_ELEMENT_TYPE_PA:
+               /* ignore these for now */
+               break;
+       default:
+               dev_info(&pf->pdev->dev, "unknown element type=%d seid=%d\n",
+                        element_type, seid);
+               break;
+       }
+}
+
+/**
+ * i40e_fetch_switch_configuration - Get switch config from firmware
+ * @pf: board private structure
+ * @printconfig: should we print the contents
+ *
+ * Get the current switch configuration from the device and
+ * extract a few useful SEID values.
+ **/
+int i40e_fetch_switch_configuration(struct i40e_pf *pf, bool printconfig)
+{
+       struct i40e_aqc_get_switch_config_resp *sw_config;
+       u16 next_seid = 0;
+       int ret = 0;
+       u8 *aq_buf;
+       int i;
+
+       aq_buf = kzalloc(I40E_AQ_LARGE_BUF, GFP_KERNEL);
+       if (!aq_buf)
+               return -ENOMEM;
+
+       sw_config = (struct i40e_aqc_get_switch_config_resp *)aq_buf;
+       do {
+               u16 num_reported, num_total;
+
+               ret = i40e_aq_get_switch_config(&pf->hw, sw_config,
+                                               I40E_AQ_LARGE_BUF,
+                                               &next_seid, NULL);
+               if (ret) {
+                       dev_info(&pf->pdev->dev,
+                                "get switch config failed %d aq_err=%x\n",
+                                ret, pf->hw.aq.asq_last_status);
+                       kfree(aq_buf);
+                       return -ENOENT;
+               }
+
+               num_reported = le16_to_cpu(sw_config->header.num_reported);
+               num_total = le16_to_cpu(sw_config->header.num_total);
+
+               if (printconfig)
+                       dev_info(&pf->pdev->dev,
+                                "header: %d reported %d total\n",
+                                num_reported, num_total);
+
+               if (num_reported) {
+                       int sz = sizeof(*sw_config) * num_reported;
+
+                       kfree(pf->sw_config);
+                       pf->sw_config = kzalloc(sz, GFP_KERNEL);
+                       if (pf->sw_config)
+                               memcpy(pf->sw_config, sw_config, sz);
+               }
+
+               for (i = 0; i < num_reported; i++) {
+                       struct i40e_aqc_switch_config_element_resp *ele =
+                               &sw_config->element[i];
+
+                       i40e_setup_pf_switch_element(pf, ele, num_reported,
+                                                    printconfig);
+               }
+       } while (next_seid != 0);
+
+       kfree(aq_buf);
+       return ret;
+}
+
+/**
+ * i40e_setup_pf_switch - Setup the HW switch on startup or after reset
+ * @pf: board private structure
+ *
+ * Returns 0 on success, negative value on failure
+ **/
+static int i40e_setup_pf_switch(struct i40e_pf *pf)
+{
+       int ret;
+
+       /* find out what's out there already */
+       ret = i40e_fetch_switch_configuration(pf, false);
+       if (ret) {
+               dev_info(&pf->pdev->dev,
+                        "couldn't fetch switch config, err %d, aq_err %d\n",
+                        ret, pf->hw.aq.asq_last_status);
+               return ret;
+       }
+       i40e_pf_reset_stats(pf);
+
+       /* fdir VSI must happen first to be sure it gets queue 0, but only
+        * if there is enough room for the fdir VSI
+        */
+       if (pf->num_lan_qps > 1)
+               i40e_fdir_setup(pf);
+
+       /* first time setup */
+       if (pf->lan_vsi == I40E_NO_VSI) {
+               struct i40e_vsi *vsi = NULL;
+               u16 uplink_seid;
+
+               /* Set up the PF VSI associated with the PF's main VSI
+                * that is already in the HW switch
+                */
+               if (pf->lan_veb != I40E_NO_VEB && pf->veb[pf->lan_veb])
+                       uplink_seid = pf->veb[pf->lan_veb]->seid;
+               else
+                       uplink_seid = pf->mac_seid;
+
+               vsi = i40e_vsi_setup(pf, I40E_VSI_MAIN, uplink_seid, 0);
+               if (!vsi) {
+                       dev_info(&pf->pdev->dev, "setup of MAIN VSI failed\n");
+                       i40e_fdir_teardown(pf);
+                       return -EAGAIN;
+               }
+               /* accommodate kcompat by copying the main VSI queue count
+                * into the pf, since this newer code pushes the pf queue
+                * info down a level into a VSI
+                */
+               pf->num_rx_queues = vsi->alloc_queue_pairs;
+               pf->num_tx_queues = vsi->alloc_queue_pairs;
+       } else {
+               /* force a reset of TC and queue layout configurations */
+               u8 enabled_tc = pf->vsi[pf->lan_vsi]->tc_config.enabled_tc;
+               pf->vsi[pf->lan_vsi]->tc_config.enabled_tc = 0;
+               pf->vsi[pf->lan_vsi]->seid = pf->main_vsi_seid;
+               i40e_vsi_config_tc(pf->vsi[pf->lan_vsi], enabled_tc);
+       }
+       i40e_vlan_stripping_disable(pf->vsi[pf->lan_vsi]);
+
+       /* Setup static PF queue filter control settings */
+       ret = i40e_setup_pf_filter_control(pf);
+       if (ret) {
+               dev_info(&pf->pdev->dev, "setup_pf_filter_control failed: %d\n",
+                        ret);
+               /* Failure here should not stop continuing other steps */
+       }
+
+       /* enable RSS in the HW, even for only one queue, as the stack can use
+        * the hash
+        */
+       if ((pf->flags & I40E_FLAG_RSS_ENABLED))
+               i40e_config_rss(pf);
+
+       /* fill in link information and enable LSE reporting */
+       i40e_aq_get_link_info(&pf->hw, true, NULL, NULL);
+       i40e_link_event(pf);
+
+       /* Initialize user-specifics link properties */
+       pf->fc_autoneg_status = ((pf->hw.phy.link_info.an_info &
+                                 I40E_AQ_AN_COMPLETED) ? true : false);
+       pf->hw.fc.requested_mode = I40E_FC_DEFAULT;
+       if (pf->hw.phy.link_info.an_info &
+          (I40E_AQ_LINK_PAUSE_TX | I40E_AQ_LINK_PAUSE_RX))
+               pf->hw.fc.current_mode = I40E_FC_FULL;
+       else if (pf->hw.phy.link_info.an_info & I40E_AQ_LINK_PAUSE_TX)
+               pf->hw.fc.current_mode = I40E_FC_TX_PAUSE;
+       else if (pf->hw.phy.link_info.an_info & I40E_AQ_LINK_PAUSE_RX)
+               pf->hw.fc.current_mode = I40E_FC_RX_PAUSE;
+       else
+               pf->hw.fc.current_mode = I40E_FC_DEFAULT;
+
+       return ret;
+}
+
+/**
+ * i40e_set_rss_size - helper to set rss_size
+ * @pf: board private structure
+ * @queues_left: how many queues
+ */
+static u16 i40e_set_rss_size(struct i40e_pf *pf, int queues_left)
+{
+       int num_tc0;
+
+       num_tc0 = min_t(int, queues_left, pf->rss_size_max);
+       num_tc0 = min_t(int, num_tc0, nr_cpus_node(numa_node_id()));
+       num_tc0 = rounddown_pow_of_two(num_tc0);
+
+       return num_tc0;
+}
+
+/**
+ * i40e_determine_queue_usage - Work out queue distribution
+ * @pf: board private structure
+ **/
+static void i40e_determine_queue_usage(struct i40e_pf *pf)
+{
+       int accum_tc_size;
+       int queues_left;
+
+       pf->num_lan_qps = 0;
+       pf->num_tc_qps = rounddown_pow_of_two(pf->num_tc_qps);
+       accum_tc_size = (I40E_MAX_TRAFFIC_CLASS - 1) * pf->num_tc_qps;
+
+       /* Find the max queues to be put into basic use.  We'll always be
+        * using TC0, whether or not DCB is running, and TC0 will get the
+        * big RSS set.
+        */
+       queues_left = pf->hw.func_caps.num_tx_qp;
+
+       if   (!((pf->flags & I40E_FLAG_MSIX_ENABLED)             &&
+               (pf->flags & I40E_FLAG_MQ_ENABLED))              ||
+               !(pf->flags & (I40E_FLAG_RSS_ENABLED |
+               I40E_FLAG_FDIR_ENABLED | I40E_FLAG_DCB_ENABLED)) ||
+               (queues_left == 1)) {
+
+               /* one qp for PF, no queues for anything else */
+               queues_left = 0;
+               pf->rss_size = pf->num_lan_qps = 1;
+
+               /* make sure all the fancies are disabled */
+               pf->flags &= ~(I40E_FLAG_RSS_ENABLED       |
+                               I40E_FLAG_MQ_ENABLED       |
+                               I40E_FLAG_FDIR_ENABLED     |
+                               I40E_FLAG_FDIR_ATR_ENABLED |
+                               I40E_FLAG_DCB_ENABLED      |
+                               I40E_FLAG_SRIOV_ENABLED    |
+                               I40E_FLAG_VMDQ_ENABLED);
+
+       } else if (pf->flags & I40E_FLAG_RSS_ENABLED      &&
+                  !(pf->flags & I40E_FLAG_FDIR_ENABLED)  &&
+                  !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+
+               pf->rss_size = i40e_set_rss_size(pf, queues_left);
+
+               queues_left -= pf->rss_size;
+               pf->num_lan_qps = pf->rss_size;
+
+       } else if (pf->flags & I40E_FLAG_RSS_ENABLED      &&
+                  !(pf->flags & I40E_FLAG_FDIR_ENABLED)  &&
+                  (pf->flags & I40E_FLAG_DCB_ENABLED)) {
+
+               /* save num_tc_qps queues for TCs 1 thru 7 and the rest
+                * are set up for RSS in TC0
+                */
+               queues_left -= accum_tc_size;
+
+               pf->rss_size = i40e_set_rss_size(pf, queues_left);
+
+               queues_left -= pf->rss_size;
+               if (queues_left < 0) {
+                       dev_info(&pf->pdev->dev, "not enough queues for DCB\n");
+                       return;
+               }
+
+               pf->num_lan_qps = pf->rss_size + accum_tc_size;
+
+       } else if (pf->flags & I40E_FLAG_RSS_ENABLED   &&
+                 (pf->flags & I40E_FLAG_FDIR_ENABLED) &&
+                 !(pf->flags & I40E_FLAG_DCB_ENABLED)) {
+
+               queues_left -= 1; /* save 1 queue for FD */
+
+               pf->rss_size = i40e_set_rss_size(pf, queues_left);
+
+               queues_left -= pf->rss_size;
+               if (queues_left < 0) {
+                       dev_info(&pf->pdev->dev, "not enough queues for Flow Director\n");
+                       return;
+               }
+
+               pf->num_lan_qps = pf->rss_size;
+
+       } else if (pf->flags & I40E_FLAG_RSS_ENABLED   &&
+                 (pf->flags & I40E_FLAG_FDIR_ENABLED) &&
+                 (pf->flags & I40E_FLAG_DCB_ENABLED)) {
+
+               /* save 1 queue for TCs 1 thru 7,
+                * 1 queue for flow director,
+                * and the rest are set up for RSS in TC0
+                */
+               queues_left -= 1;
+               queues_left -= accum_tc_size;
+
+               pf->rss_size = i40e_set_rss_size(pf, queues_left);
+               queues_left -= pf->rss_size;
+               if (queues_left < 0) {
+                       dev_info(&pf->pdev->dev, "not enough queues for DCB and Flow Director\n");
+                       return;
+               }
+
+               pf->num_lan_qps = pf->rss_size + accum_tc_size;
+
+       } else {
+               dev_info(&pf->pdev->dev,
+                        "Invalid configuration, flags=0x%08llx\n", pf->flags);
+               return;
+       }
+
+       if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+           pf->num_vf_qps && pf->num_req_vfs && queues_left) {
+               pf->num_req_vfs = min_t(int, pf->num_req_vfs, (queues_left /
+                                                              pf->num_vf_qps));
+               queues_left -= (pf->num_req_vfs * pf->num_vf_qps);
+       }
+
+       if ((pf->flags & I40E_FLAG_VMDQ_ENABLED) &&
+           pf->num_vmdq_vsis && pf->num_vmdq_qps && queues_left) {
+               pf->num_vmdq_vsis = min_t(int, pf->num_vmdq_vsis,
+                                         (queues_left / pf->num_vmdq_qps));
+               queues_left -= (pf->num_vmdq_vsis * pf->num_vmdq_qps);
+       }
+
+       return;
+}
+
+/**
+ * i40e_setup_pf_filter_control - Setup PF static filter control
+ * @pf: PF to be setup
+ *
+ * i40e_setup_pf_filter_control sets up a pf's initial filter control
+ * settings. If PE/FCoE are enabled then it will also set the per PF
+ * based filter sizes required for them. It also enables Flow director,
+ * ethertype and macvlan type filter settings for the pf.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_setup_pf_filter_control(struct i40e_pf *pf)
+{
+       struct i40e_filter_control_settings *settings = &pf->filter_settings;
+
+       settings->hash_lut_size = I40E_HASH_LUT_SIZE_128;
+
+       /* Flow Director is enabled */
+       if (pf->flags & (I40E_FLAG_FDIR_ENABLED | I40E_FLAG_FDIR_ATR_ENABLED))
+               settings->enable_fdir = true;
+
+       /* Ethtype and MACVLAN filters enabled for PF */
+       settings->enable_ethtype = true;
+       settings->enable_macvlan = true;
+
+       if (i40e_set_filter_control(&pf->hw, settings))
+               return -ENOENT;
+
+       return 0;
+}
+
+/**
+ * i40e_probe - Device initialization routine
+ * @pdev: PCI device information struct
+ * @ent: entry in i40e_pci_tbl
+ *
+ * i40e_probe initializes a pf identified by a pci_dev structure.
+ * The OS initialization, configuring of the pf private structure,
+ * and a hardware reset occur.
+ *
+ * Returns 0 on success, negative on failure
+ **/
+static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       struct i40e_driver_version dv;
+       struct i40e_pf *pf;
+       struct i40e_hw *hw;
+       int err = 0;
+       u32 len;
+
+       err = pci_enable_device_mem(pdev);
+       if (err)
+               return err;
+
+       /* set up for high or low dma */
+       if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(64))) {
+               /* coherent mask for the same size will always succeed if
+                * dma_set_mask does
+                */
+               dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
+       } else if (!dma_set_mask(&pdev->dev, DMA_BIT_MASK(32))) {
+               dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
+       } else {
+               dev_err(&pdev->dev, "DMA configuration failed: %d\n", err);
+               err = -EIO;
+               goto err_dma;
+       }
+
+       /* set up pci connections */
+       err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
+                                          IORESOURCE_MEM), i40e_driver_name);
+       if (err) {
+               dev_info(&pdev->dev,
+                        "pci_request_selected_regions failed %d\n", err);
+               goto err_pci_reg;
+       }
+
+       pci_enable_pcie_error_reporting(pdev);
+       pci_set_master(pdev);
+
+       /* Now that we have a PCI connection, we need to do the
+        * low level device setup.  This is primarily setting up
+        * the Admin Queue structures and then querying for the
+        * device's current profile information.
+        */
+       pf = kzalloc(sizeof(*pf), GFP_KERNEL);
+       if (!pf) {
+               err = -ENOMEM;
+               goto err_pf_alloc;
+       }
+       pf->next_vsi = 0;
+       pf->pdev = pdev;
+       set_bit(__I40E_DOWN, &pf->state);
+
+       hw = &pf->hw;
+       hw->back = pf;
+       hw->hw_addr = ioremap(pci_resource_start(pdev, 0),
+                             pci_resource_len(pdev, 0));
+       if (!hw->hw_addr) {
+               err = -EIO;
+               dev_info(&pdev->dev, "ioremap(0x%04x, 0x%04x) failed: 0x%x\n",
+                        (unsigned int)pci_resource_start(pdev, 0),
+                        (unsigned int)pci_resource_len(pdev, 0), err);
+               goto err_ioremap;
+       }
+       hw->vendor_id = pdev->vendor;
+       hw->device_id = pdev->device;
+       pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id);
+       hw->subsystem_vendor_id = pdev->subsystem_vendor;
+       hw->subsystem_device_id = pdev->subsystem_device;
+       hw->bus.device = PCI_SLOT(pdev->devfn);
+       hw->bus.func = PCI_FUNC(pdev->devfn);
+
+       /* Reset here to make sure all is clean and to define PF 'n' */
+       err = i40e_pf_reset(hw);
+       if (err) {
+               dev_info(&pdev->dev, "Initial pf_reset failed: %d\n", err);
+               goto err_pf_reset;
+       }
+       pf->pfr_count++;
+
+       hw->aq.num_arq_entries = I40E_AQ_LEN;
+       hw->aq.num_asq_entries = I40E_AQ_LEN;
+       hw->aq.arq_buf_size = I40E_MAX_AQ_BUF_SIZE;
+       hw->aq.asq_buf_size = I40E_MAX_AQ_BUF_SIZE;
+       pf->adminq_work_limit = I40E_AQ_WORK_LIMIT;
+       snprintf(pf->misc_int_name, sizeof(pf->misc_int_name) - 1,
+                "%s-pf%d:misc",
+                dev_driver_string(&pf->pdev->dev), pf->hw.pf_id);
+
+       err = i40e_init_shared_code(hw);
+       if (err) {
+               dev_info(&pdev->dev, "init_shared_code failed: %d\n", err);
+               goto err_pf_reset;
+       }
+
+       err = i40e_init_adminq(hw);
+       dev_info(&pdev->dev, "%s\n", i40e_fw_version_str(hw));
+       if (err) {
+               dev_info(&pdev->dev,
+                        "init_adminq failed: %d expecting API %02x.%02x\n",
+                        err,
+                        I40E_FW_API_VERSION_MAJOR, I40E_FW_API_VERSION_MINOR);
+               goto err_pf_reset;
+       }
+
+       err = i40e_get_capabilities(pf);
+       if (err)
+               goto err_adminq_setup;
+
+       err = i40e_sw_init(pf);
+       if (err) {
+               dev_info(&pdev->dev, "sw_init failed: %d\n", err);
+               goto err_sw_init;
+       }
+
+       err = i40e_init_lan_hmc(hw, hw->func_caps.num_tx_qp,
+                               hw->func_caps.num_rx_qp,
+                               pf->fcoe_hmc_cntx_num, pf->fcoe_hmc_filt_num);
+       if (err) {
+               dev_info(&pdev->dev, "init_lan_hmc failed: %d\n", err);
+               goto err_init_lan_hmc;
+       }
+
+       err = i40e_configure_lan_hmc(hw, I40E_HMC_MODEL_DIRECT_ONLY);
+       if (err) {
+               dev_info(&pdev->dev, "configure_lan_hmc failed: %d\n", err);
+               err = -ENOENT;
+               goto err_configure_lan_hmc;
+       }
+
+       i40e_get_mac_addr(hw, hw->mac.addr);
+       if (i40e_validate_mac_addr(hw->mac.addr)) {
+               dev_info(&pdev->dev, "invalid MAC address %pM\n", hw->mac.addr);
+               err = -EIO;
+               goto err_mac_addr;
+       }
+       dev_info(&pdev->dev, "MAC address: %pM\n", hw->mac.addr);
+       memcpy(hw->mac.perm_addr, hw->mac.addr, ETH_ALEN);
+
+       pci_set_drvdata(pdev, pf);
+       pci_save_state(pdev);
+
+       /* set up periodic task facility */
+       setup_timer(&pf->service_timer, i40e_service_timer, (unsigned long)pf);
+       pf->service_timer_period = HZ;
+
+       INIT_WORK(&pf->service_task, i40e_service_task);
+       clear_bit(__I40E_SERVICE_SCHED, &pf->state);
+       pf->flags |= I40E_FLAG_NEED_LINK_UPDATE;
+       pf->link_check_timeout = jiffies;
+
+       /* set up the main switch operations */
+       i40e_determine_queue_usage(pf);
+       i40e_init_interrupt_scheme(pf);
+
+       /* Set up the *vsi struct based on the number of VSIs in the HW,
+        * and set up our local tracking of the MAIN PF vsi.
+        */
+       len = sizeof(struct i40e_vsi *) * pf->hw.func_caps.num_vsis;
+       pf->vsi = kzalloc(len, GFP_KERNEL);
+       if (!pf->vsi)
+               goto err_switch_setup;
+
+       err = i40e_setup_pf_switch(pf);
+       if (err) {
+               dev_info(&pdev->dev, "setup_pf_switch failed: %d\n", err);
+               goto err_vsis;
+       }
+
+       /* The main driver is (mostly) up and happy. We need to set this state
+        * before setting up the misc vector or we get a race and the vector
+        * ends up disabled forever.
+        */
+       clear_bit(__I40E_DOWN, &pf->state);
+
+       /* In case of MSIX we are going to setup the misc vector right here
+        * to handle admin queue events etc. In case of legacy and MSI
+        * the misc functionality and queue processing is combined in
+        * the same vector and that gets setup at open.
+        */
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               err = i40e_setup_misc_vector(pf);
+               if (err) {
+                       dev_info(&pdev->dev,
+                                "setup of misc vector failed: %d\n", err);
+                       goto err_vsis;
+               }
+       }
+
+       /* prep for VF support */
+       if ((pf->flags & I40E_FLAG_SRIOV_ENABLED) &&
+           (pf->flags & I40E_FLAG_MSIX_ENABLED)) {
+               u32 val;
+
+               /* disable link interrupts for VFs */
+               val = rd32(hw, I40E_PFGEN_PORTMDIO_NUM);
+               val &= ~I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK;
+               wr32(hw, I40E_PFGEN_PORTMDIO_NUM, val);
+               i40e_flush(hw);
+       }
+
+       i40e_dbg_pf_init(pf);
+
+       /* tell the firmware that we're starting */
+       dv.major_version = DRV_VERSION_MAJOR;
+       dv.minor_version = DRV_VERSION_MINOR;
+       dv.build_version = DRV_VERSION_BUILD;
+       dv.subbuild_version = 0;
+       i40e_aq_send_driver_version(&pf->hw, &dv, NULL);
+
+       /* since everything's happy, start the service_task timer */
+       mod_timer(&pf->service_timer,
+                 round_jiffies(jiffies + pf->service_timer_period));
+
+       return 0;
+
+       /* Unwind what we've done if something failed in the setup */
+err_vsis:
+       set_bit(__I40E_DOWN, &pf->state);
+err_switch_setup:
+       i40e_clear_interrupt_scheme(pf);
+       kfree(pf->vsi);
+       del_timer_sync(&pf->service_timer);
+err_mac_addr:
+err_configure_lan_hmc:
+       (void)i40e_shutdown_lan_hmc(hw);
+err_init_lan_hmc:
+       kfree(pf->qp_pile);
+       kfree(pf->irq_pile);
+err_sw_init:
+err_adminq_setup:
+       (void)i40e_shutdown_adminq(hw);
+err_pf_reset:
+       iounmap(hw->hw_addr);
+err_ioremap:
+       kfree(pf);
+err_pf_alloc:
+       pci_disable_pcie_error_reporting(pdev);
+       pci_release_selected_regions(pdev,
+                                    pci_select_bars(pdev, IORESOURCE_MEM));
+err_pci_reg:
+err_dma:
+       pci_disable_device(pdev);
+       return err;
+}
+
+/**
+ * i40e_remove - Device removal routine
+ * @pdev: PCI device information struct
+ *
+ * i40e_remove is called by the PCI subsystem to alert the driver
+ * that is should release a PCI device.  This could be caused by a
+ * Hot-Plug event, or because the driver is going to be removed from
+ * memory.
+ **/
+static void i40e_remove(struct pci_dev *pdev)
+{
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+       i40e_status ret_code;
+       u32 reg;
+       int i;
+
+       i40e_dbg_pf_exit(pf);
+
+       if (pf->flags & I40E_FLAG_SRIOV_ENABLED) {
+               i40e_free_vfs(pf);
+               pf->flags &= ~I40E_FLAG_SRIOV_ENABLED;
+       }
+
+       /* no more scheduling of any task */
+       set_bit(__I40E_DOWN, &pf->state);
+       del_timer_sync(&pf->service_timer);
+       cancel_work_sync(&pf->service_task);
+
+       i40e_fdir_teardown(pf);
+
+       /* If there is a switch structure or any orphans, remove them.
+        * This will leave only the PF's VSI remaining.
+        */
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               if (!pf->veb[i])
+                       continue;
+
+               if (pf->veb[i]->uplink_seid == pf->mac_seid ||
+                   pf->veb[i]->uplink_seid == 0)
+                       i40e_switch_branch_release(pf->veb[i]);
+       }
+
+       /* Now we can shutdown the PF's VSI, just before we kill
+        * adminq and hmc.
+        */
+       if (pf->vsi[pf->lan_vsi])
+               i40e_vsi_release(pf->vsi[pf->lan_vsi]);
+
+       i40e_stop_misc_vector(pf);
+       if (pf->flags & I40E_FLAG_MSIX_ENABLED) {
+               synchronize_irq(pf->msix_entries[0].vector);
+               free_irq(pf->msix_entries[0].vector, pf);
+       }
+
+       /* shutdown and destroy the HMC */
+       ret_code = i40e_shutdown_lan_hmc(&pf->hw);
+       if (ret_code)
+               dev_warn(&pdev->dev,
+                        "Failed to destroy the HMC resources: %d\n", ret_code);
+
+       /* shutdown the adminq */
+       i40e_aq_queue_shutdown(&pf->hw, true);
+       ret_code = i40e_shutdown_adminq(&pf->hw);
+       if (ret_code)
+               dev_warn(&pdev->dev,
+                        "Failed to destroy the Admin Queue resources: %d\n",
+                        ret_code);
+
+       /* Clear all dynamic memory lists of rings, q_vectors, and VSIs */
+       i40e_clear_interrupt_scheme(pf);
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++) {
+               if (pf->vsi[i]) {
+                       i40e_vsi_clear_rings(pf->vsi[i]);
+                       i40e_vsi_clear(pf->vsi[i]);
+                       pf->vsi[i] = NULL;
+               }
+       }
+
+       for (i = 0; i < I40E_MAX_VEB; i++) {
+               kfree(pf->veb[i]);
+               pf->veb[i] = NULL;
+       }
+
+       kfree(pf->qp_pile);
+       kfree(pf->irq_pile);
+       kfree(pf->sw_config);
+       kfree(pf->vsi);
+
+       /* force a PF reset to clean anything leftover */
+       reg = rd32(&pf->hw, I40E_PFGEN_CTRL);
+       wr32(&pf->hw, I40E_PFGEN_CTRL, (reg | I40E_PFGEN_CTRL_PFSWR_MASK));
+       i40e_flush(&pf->hw);
+
+       iounmap(pf->hw.hw_addr);
+       kfree(pf);
+       pci_release_selected_regions(pdev,
+                                    pci_select_bars(pdev, IORESOURCE_MEM));
+
+       pci_disable_pcie_error_reporting(pdev);
+       pci_disable_device(pdev);
+}
+
+/**
+ * i40e_pci_error_detected - warning that something funky happened in PCI land
+ * @pdev: PCI device information struct
+ *
+ * Called to warn that something happened and the error handling steps
+ * are in progress.  Allows the driver to quiesce things, be ready for
+ * remediation.
+ **/
+static pci_ers_result_t i40e_pci_error_detected(struct pci_dev *pdev,
+                                               enum pci_channel_state error)
+{
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+       dev_info(&pdev->dev, "%s: error %d\n", __func__, error);
+
+       /* shutdown all operations */
+       i40e_pf_quiesce_all_vsi(pf);
+
+       /* Request a slot reset */
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * i40e_pci_error_slot_reset - a PCI slot reset just happened
+ * @pdev: PCI device information struct
+ *
+ * Called to find if the driver can work with the device now that
+ * the pci slot has been reset.  If a basic connection seems good
+ * (registers are readable and have sane content) then return a
+ * happy little PCI_ERS_RESULT_xxx.
+ **/
+static pci_ers_result_t i40e_pci_error_slot_reset(struct pci_dev *pdev)
+{
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+       pci_ers_result_t result;
+       int err;
+       u32 reg;
+
+       dev_info(&pdev->dev, "%s\n", __func__);
+       if (pci_enable_device_mem(pdev)) {
+               dev_info(&pdev->dev,
+                        "Cannot re-enable PCI device after reset.\n");
+               result = PCI_ERS_RESULT_DISCONNECT;
+       } else {
+               pci_set_master(pdev);
+               pci_restore_state(pdev);
+               pci_save_state(pdev);
+               pci_wake_from_d3(pdev, false);
+
+               reg = rd32(&pf->hw, I40E_GLGEN_RTRIG);
+               if (reg == 0)
+                       result = PCI_ERS_RESULT_RECOVERED;
+               else
+                       result = PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       err = pci_cleanup_aer_uncorrect_error_status(pdev);
+       if (err) {
+               dev_info(&pdev->dev,
+                        "pci_cleanup_aer_uncorrect_error_status failed 0x%0x\n",
+                        err);
+               /* non-fatal, continue */
+       }
+
+       return result;
+}
+
+/**
+ * i40e_pci_error_resume - restart operations after PCI error recovery
+ * @pdev: PCI device information struct
+ *
+ * Called to allow the driver to bring things back up after PCI error
+ * and/or reset recovery has finished.
+ **/
+static void i40e_pci_error_resume(struct pci_dev *pdev)
+{
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+       dev_info(&pdev->dev, "%s\n", __func__);
+       i40e_handle_reset_warning(pf);
+}
+
+static const struct pci_error_handlers i40e_err_handler = {
+       .error_detected = i40e_pci_error_detected,
+       .slot_reset = i40e_pci_error_slot_reset,
+       .resume = i40e_pci_error_resume,
+};
+
+static struct pci_driver i40e_driver = {
+       .name     = i40e_driver_name,
+       .id_table = i40e_pci_tbl,
+       .probe    = i40e_probe,
+       .remove   = i40e_remove,
+       .err_handler = &i40e_err_handler,
+       .sriov_configure = i40e_pci_sriov_configure,
+};
+
+/**
+ * i40e_init_module - Driver registration routine
+ *
+ * i40e_init_module is the first routine called when the driver is
+ * loaded. All it does is register with the PCI subsystem.
+ **/
+static int __init i40e_init_module(void)
+{
+       pr_info("%s: %s - version %s\n", i40e_driver_name,
+               i40e_driver_string, i40e_driver_version_str);
+       pr_info("%s: %s\n", i40e_driver_name, i40e_copyright);
+       i40e_dbg_init();
+       return pci_register_driver(&i40e_driver);
+}
+module_init(i40e_init_module);
+
+/**
+ * i40e_exit_module - Driver exit cleanup routine
+ *
+ * i40e_exit_module is called just before the driver is removed
+ * from memory.
+ **/
+static void __exit i40e_exit_module(void)
+{
+       pci_unregister_driver(&i40e_driver);
+       i40e_dbg_exit();
+}
+module_exit(i40e_exit_module);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_nvm.c b/drivers/net/ethernet/intel/i40e/i40e_nvm.c
new file mode 100644 (file)
index 0000000..97e1bb3
--- /dev/null
@@ -0,0 +1,391 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e_prototype.h"
+
+/**
+ *  i40e_init_nvm_ops - Initialize NVM function pointers.
+ *  @hw: pointer to the HW structure.
+ *
+ *  Setups the function pointers and the NVM info structure. Should be called
+ *  once per NVM initialization, e.g. inside the i40e_init_shared_code().
+ *  Please notice that the NVM term is used here (& in all methods covered
+ *  in this file) as an equivalent of the FLASH part mapped into the SR.
+ *  We are accessing FLASH always thru the Shadow RAM.
+ **/
+i40e_status i40e_init_nvm(struct i40e_hw *hw)
+{
+       struct i40e_nvm_info *nvm = &hw->nvm;
+       i40e_status ret_code = 0;
+       u32 fla, gens;
+       u8 sr_size;
+
+       /* The SR size is stored regardless of the nvm programming mode
+        * as the blank mode may be used in the factory line.
+        */
+       gens = rd32(hw, I40E_GLNVM_GENS);
+       sr_size = ((gens & I40E_GLNVM_GENS_SR_SIZE_MASK) >>
+                          I40E_GLNVM_GENS_SR_SIZE_SHIFT);
+       /* Switching to words (sr_size contains power of 2KB). */
+       nvm->sr_size = (1 << sr_size) * I40E_SR_WORDS_IN_1KB;
+
+       /* Check if we are in the normal or blank NVM programming mode. */
+       fla = rd32(hw, I40E_GLNVM_FLA);
+       if (fla & I40E_GLNVM_FLA_LOCKED_MASK) { /* Normal programming mode. */
+               /* Max NVM timeout. */
+               nvm->timeout = I40E_MAX_NVM_TIMEOUT;
+               nvm->blank_nvm_mode = false;
+       } else { /* Blank programming mode. */
+               nvm->blank_nvm_mode = true;
+               ret_code = I40E_ERR_NVM_BLANK_MODE;
+               hw_dbg(hw, "NVM init error: unsupported blank mode.\n");
+       }
+
+       return ret_code;
+}
+
+/**
+ *  i40e_acquire_nvm - Generic request for acquiring the NVM ownership.
+ *  @hw: pointer to the HW structure.
+ *  @access: NVM access type (read or write).
+ *
+ *  This function will request NVM ownership for reading
+ *  via the proper Admin Command.
+ **/
+i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
+                                      enum i40e_aq_resource_access_type access)
+{
+       i40e_status ret_code = 0;
+       u64 gtime, timeout;
+       u64 time = 0;
+
+       if (hw->nvm.blank_nvm_mode)
+               goto i40e_i40e_acquire_nvm_exit;
+
+       ret_code = i40e_aq_request_resource(hw, I40E_NVM_RESOURCE_ID, access,
+                                           0, &time, NULL);
+       /* Reading the Global Device Timer. */
+       gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+
+       /* Store the timeout. */
+       hw->nvm.hw_semaphore_timeout = I40E_MS_TO_GTIME(time) + gtime;
+
+       if (ret_code) {
+               /* Set the polling timeout. */
+               if (time > I40E_MAX_NVM_TIMEOUT)
+                       timeout = I40E_MS_TO_GTIME(I40E_MAX_NVM_TIMEOUT)
+                                 + gtime;
+               else
+                       timeout = hw->nvm.hw_semaphore_timeout;
+               /* Poll until the current NVM owner timeouts. */
+               while (gtime < timeout) {
+                       usleep_range(10000, 20000);
+                       ret_code = i40e_aq_request_resource(hw,
+                                                       I40E_NVM_RESOURCE_ID,
+                                                       access, 0, &time,
+                                                       NULL);
+                       if (!ret_code) {
+                               hw->nvm.hw_semaphore_timeout =
+                                               I40E_MS_TO_GTIME(time) + gtime;
+                               break;
+                       }
+                       gtime = rd32(hw, I40E_GLVFGEN_TIMER);
+               }
+               if (ret_code) {
+                       hw->nvm.hw_semaphore_timeout = 0;
+                       hw->nvm.hw_semaphore_wait =
+                                               I40E_MS_TO_GTIME(time) + gtime;
+                       hw_dbg(hw, "NVM acquire timed out, wait %llu ms before trying again.\n",
+                                 time);
+               }
+       }
+
+i40e_i40e_acquire_nvm_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_release_nvm - Generic request for releasing the NVM ownership.
+ *  @hw: pointer to the HW structure.
+ *
+ *  This function will release NVM resource via the proper Admin Command.
+ **/
+void i40e_release_nvm(struct i40e_hw *hw)
+{
+       if (!hw->nvm.blank_nvm_mode)
+               i40e_aq_release_resource(hw, I40E_NVM_RESOURCE_ID, 0, NULL);
+}
+
+/**
+ *  i40e_poll_sr_srctl_done_bit - Polls the GLNVM_SRCTL done bit.
+ *  @hw: pointer to the HW structure.
+ *
+ *  Polls the SRCTL Shadow RAM register done bit.
+ **/
+static i40e_status i40e_poll_sr_srctl_done_bit(struct i40e_hw *hw)
+{
+       i40e_status ret_code = I40E_ERR_TIMEOUT;
+       u32 srctl, wait_cnt;
+
+       /* Poll the I40E_GLNVM_SRCTL until the done bit is set. */
+       for (wait_cnt = 0; wait_cnt < I40E_SRRD_SRCTL_ATTEMPTS; wait_cnt++) {
+               srctl = rd32(hw, I40E_GLNVM_SRCTL);
+               if (srctl & I40E_GLNVM_SRCTL_DONE_MASK) {
+                       ret_code = 0;
+                       break;
+               }
+               udelay(5);
+       }
+       if (ret_code == I40E_ERR_TIMEOUT)
+               hw_dbg(hw, "Done bit in GLNVM_SRCTL not set");
+       return ret_code;
+}
+
+/**
+ *  i40e_read_nvm_srctl - Reads Shadow RAM.
+ *  @hw: pointer to the HW structure.
+ *  @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ *  @data: word read from the Shadow RAM.
+ *
+ *  Reads 16 bit word from the Shadow RAM using the GLNVM_SRCTL register.
+ **/
+static i40e_status i40e_read_nvm_srctl(struct i40e_hw *hw, u16 offset,
+                                                u16 *data)
+{
+       i40e_status ret_code = I40E_ERR_TIMEOUT;
+       u32 sr_reg;
+
+       if (offset >= hw->nvm.sr_size) {
+               hw_dbg(hw, "NVM read error: Offset beyond Shadow RAM limit.\n");
+               ret_code = I40E_ERR_PARAM;
+               goto read_nvm_exit;
+       }
+
+       /* Poll the done bit first. */
+       ret_code = i40e_poll_sr_srctl_done_bit(hw);
+       if (!ret_code) {
+               /* Write the address and start reading. */
+               sr_reg = (u32)(offset << I40E_GLNVM_SRCTL_ADDR_SHIFT) |
+                        (1 << I40E_GLNVM_SRCTL_START_SHIFT);
+               wr32(hw, I40E_GLNVM_SRCTL, sr_reg);
+
+               /* Poll I40E_GLNVM_SRCTL until the done bit is set. */
+               ret_code = i40e_poll_sr_srctl_done_bit(hw);
+               if (!ret_code) {
+                       sr_reg = rd32(hw, I40E_GLNVM_SRDATA);
+                       *data = (u16)((sr_reg &
+                                      I40E_GLNVM_SRDATA_RDDATA_MASK)
+                                   >> I40E_GLNVM_SRDATA_RDDATA_SHIFT);
+               }
+       }
+       if (ret_code)
+               hw_dbg(hw, "NVM read error: Couldn't access Shadow RAM address: 0x%x\n",
+                         offset);
+
+read_nvm_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_read_nvm_word - Reads Shadow RAM word.
+ *  @hw: pointer to the HW structure.
+ *  @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ *  @data: word read from the Shadow RAM.
+ *
+ *  Reads 16 bit word from the Shadow RAM. Each read is preceded
+ *  with the NVM ownership taking and followed by the release.
+ **/
+i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+                                        u16 *data)
+{
+       i40e_status ret_code = 0;
+
+       ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (!ret_code) {
+               ret_code = i40e_read_nvm_srctl(hw, offset, data);
+               i40e_release_nvm(hw);
+       }
+
+       return ret_code;
+}
+
+/**
+ *  i40e_read_nvm_buffer - Reads Shadow RAM buffer.
+ *  @hw: pointer to the HW structure.
+ *  @offset: offset of the Shadow RAM word to read (0x000000 - 0x001FFF).
+ *  @words: number of words to read (in) &
+ *          number of words read before the NVM ownership timeout (out).
+ *  @data: words read from the Shadow RAM.
+ *
+ *  Reads 16 bit words (data buffer) from the SR using the i40e_read_nvm_srrd()
+ *  method. The buffer read is preceded by the NVM ownership take
+ *  and followed by the release.
+ **/
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+                                          u16 *words, u16 *data)
+{
+       i40e_status ret_code = 0;
+       u16 index, word;
+       u32 time;
+
+       ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (!ret_code) {
+               /* Loop thru the selected region. */
+               for (word = 0; word < *words; word++) {
+                       index = offset + word;
+                       ret_code = i40e_read_nvm_srctl(hw, index, &data[word]);
+                       if (ret_code)
+                               break;
+                       /* Check if we didn't exceeded the semaphore timeout. */
+                       time = rd32(hw, I40E_GLVFGEN_TIMER);
+                       if (time >= hw->nvm.hw_semaphore_timeout) {
+                               ret_code = I40E_ERR_TIMEOUT;
+                               hw_dbg(hw, "NVM read error: timeout.\n");
+                               break;
+                       }
+               }
+               /* Update the number of words read from the Shadow RAM. */
+               *words = word;
+               /* Release the NVM ownership. */
+               i40e_release_nvm(hw);
+       }
+
+       return ret_code;
+}
+
+/**
+ *  i40e_calc_nvm_checksum - Calculates and returns the checksum
+ *  @hw: pointer to hardware structure
+ *
+ *  This function calculate SW Checksum that covers the whole 64kB shadow RAM
+ *  except the VPD and PCIe ALT Auto-load modules. The structure and size of VPD
+ *  is customer specific and unknown. Therefore, this function skips all maximum
+ *  possible size of VPD (1kB).
+ **/
+static i40e_status i40e_calc_nvm_checksum(struct i40e_hw *hw,
+                                                   u16 *checksum)
+{
+       i40e_status ret_code = 0;
+       u16 pcie_alt_module = 0;
+       u16 checksum_local = 0;
+       u16 vpd_module = 0;
+       u16 word = 0;
+       u32 i = 0;
+
+       /* read pointer to VPD area */
+       ret_code = i40e_read_nvm_srctl(hw, I40E_SR_VPD_PTR, &vpd_module);
+       if (ret_code) {
+               ret_code = I40E_ERR_NVM_CHECKSUM;
+               goto i40e_calc_nvm_checksum_exit;
+       }
+
+       /* read pointer to PCIe Alt Auto-load module */
+       ret_code = i40e_read_nvm_srctl(hw, I40E_SR_PCIE_ALT_AUTO_LOAD_PTR,
+                                      &pcie_alt_module);
+       if (ret_code) {
+               ret_code = I40E_ERR_NVM_CHECKSUM;
+               goto i40e_calc_nvm_checksum_exit;
+       }
+
+       /* Calculate SW checksum that covers the whole 64kB shadow RAM
+        * except the VPD and PCIe ALT Auto-load modules
+        */
+       for (i = 0; i < hw->nvm.sr_size; i++) {
+               /* Skip Checksum word */
+               if (i == I40E_SR_SW_CHECKSUM_WORD)
+                       i++;
+               /* Skip VPD module (convert byte size to word count) */
+               if (i == (u32)vpd_module) {
+                       i += (I40E_SR_VPD_MODULE_MAX_SIZE / 2);
+                       if (i >= hw->nvm.sr_size)
+                               break;
+               }
+               /* Skip PCIe ALT module (convert byte size to word count) */
+               if (i == (u32)pcie_alt_module) {
+                       i += (I40E_SR_PCIE_ALT_MODULE_MAX_SIZE / 2);
+                       if (i >= hw->nvm.sr_size)
+                               break;
+               }
+
+               ret_code = i40e_read_nvm_srctl(hw, (u16)i, &word);
+               if (ret_code) {
+                       ret_code = I40E_ERR_NVM_CHECKSUM;
+                       goto i40e_calc_nvm_checksum_exit;
+               }
+               checksum_local += word;
+       }
+
+       *checksum = (u16)I40E_SR_SW_CHECKSUM_BASE - checksum_local;
+
+i40e_calc_nvm_checksum_exit:
+       return ret_code;
+}
+
+/**
+ *  i40e_validate_nvm_checksum - Validate EEPROM checksum
+ *  @hw: pointer to hardware structure
+ *  @checksum: calculated checksum
+ *
+ *  Performs checksum calculation and validates the NVM SW checksum. If the
+ *  caller does not need checksum, the value can be NULL.
+ **/
+i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
+                                                u16 *checksum)
+{
+       i40e_status ret_code = 0;
+       u16 checksum_sr = 0;
+       u16 checksum_local;
+
+       ret_code = i40e_acquire_nvm(hw, I40E_RESOURCE_READ);
+       if (ret_code)
+               goto i40e_validate_nvm_checksum_exit;
+
+       ret_code = i40e_calc_nvm_checksum(hw, &checksum_local);
+       if (ret_code)
+               goto i40e_validate_nvm_checksum_free;
+
+       /* Do not use i40e_read_nvm_word() because we do not want to take
+        * the synchronization semaphores twice here.
+        */
+       i40e_read_nvm_srctl(hw, I40E_SR_SW_CHECKSUM_WORD, &checksum_sr);
+
+       /* Verify read checksum from EEPROM is the same as
+        * calculated checksum
+        */
+       if (checksum_local != checksum_sr)
+               ret_code = I40E_ERR_NVM_CHECKSUM;
+
+       /* If the user cares, return the calculated checksum */
+       if (checksum)
+               *checksum = checksum_local;
+
+i40e_validate_nvm_checksum_free:
+       i40e_release_nvm(hw);
+
+i40e_validate_nvm_checksum_exit:
+       return ret_code;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_osdep.h b/drivers/net/ethernet/intel/i40e/i40e_osdep.h
new file mode 100644 (file)
index 0000000..702c81b
--- /dev/null
@@ -0,0 +1,82 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_OSDEP_H_
+#define _I40E_OSDEP_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/tcp.h>
+#include <linux/pci.h>
+#include <linux/highuid.h>
+
+/* get readq/writeq support for 32 bit kernels, use the low-first version */
+#include <asm-generic/io-64-nonatomic-lo-hi.h>
+
+/* File to be the magic between shared code and
+ * actual OS primitives
+ */
+
+#define hw_dbg(hw, S, A...)    do {} while (0)
+
+#define wr32(a, reg, value)    writel((value), ((a)->hw_addr + (reg)))
+#define rd32(a, reg)           readl((a)->hw_addr + (reg))
+
+#define wr64(a, reg, value)    writeq((value), ((a)->hw_addr + (reg)))
+#define rd64(a, reg)           readq((a)->hw_addr + (reg))
+#define i40e_flush(a)          readl((a)->hw_addr + I40E_GLGEN_STAT)
+
+/* memory allocation tracking */
+struct i40e_dma_mem {
+       void *va;
+       dma_addr_t pa;
+       u32 size;
+} __packed;
+
+#define i40e_allocate_dma_mem(h, m, unused, s, a) \
+                       i40e_allocate_dma_mem_d(h, m, s, a)
+#define i40e_free_dma_mem(h, m) i40e_free_dma_mem_d(h, m)
+
+struct i40e_virt_mem {
+       void *va;
+       u32 size;
+} __packed;
+
+#define i40e_allocate_virt_mem(h, m, s) i40e_allocate_virt_mem_d(h, m, s)
+#define i40e_free_virt_mem(h, m) i40e_free_virt_mem_d(h, m)
+
+#define i40e_debug(h, m, s, ...)                                \
+do {                                                            \
+       if (((m) & (h)->debug_mask))                            \
+               pr_info("i40e %02x.%x " s,                      \
+                       (h)->bus.device, (h)->bus.func,         \
+                       ##__VA_ARGS__);                         \
+} while (0)
+
+typedef enum i40e_status_code i40e_status;
+#endif /* _I40E_OSDEP_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
new file mode 100644 (file)
index 0000000..f75bb9c
--- /dev/null
@@ -0,0 +1,239 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_PROTOTYPE_H_
+#define _I40E_PROTOTYPE_H_
+
+#include "i40e_type.h"
+#include "i40e_alloc.h"
+#include "i40e_virtchnl.h"
+
+/* Prototypes for shared code functions that are not in
+ * the standard function pointer structures.  These are
+ * mostly because they are needed even before the init
+ * has happened and will assist in the early SW and FW
+ * setup.
+ */
+
+/* adminq functions */
+i40e_status i40e_init_adminq(struct i40e_hw *hw);
+i40e_status i40e_shutdown_adminq(struct i40e_hw *hw);
+void i40e_adminq_init_ring_data(struct i40e_hw *hw);
+i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
+                                            struct i40e_arq_event_info *e,
+                                            u16 *events_pending);
+i40e_status i40e_asq_send_command(struct i40e_hw *hw,
+                               struct i40e_aq_desc *desc,
+                               void *buff, /* can be NULL */
+                               u16  buff_size,
+                               struct i40e_asq_cmd_details *cmd_details);
+bool i40e_asq_done(struct i40e_hw *hw);
+
+/* debug function for adminq */
+void i40e_debug_aq(struct i40e_hw *hw,
+                  enum i40e_debug_mask mask,
+                  void *desc,
+                  void *buffer);
+
+void i40e_idle_aq(struct i40e_hw *hw);
+void i40e_resume_aq(struct i40e_hw *hw);
+
+u32 i40e_led_get(struct i40e_hw *hw);
+void i40e_led_set(struct i40e_hw *hw, u32 mode);
+
+/* admin send queue commands */
+
+i40e_status i40e_aq_get_firmware_version(struct i40e_hw *hw,
+                               u16 *fw_major_version, u16 *fw_minor_version,
+                               u16 *api_major_version, u16 *api_minor_version,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_queue_shutdown(struct i40e_hw *hw,
+                                            bool unloading);
+i40e_status i40e_aq_set_phy_reset(struct i40e_hw *hw,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_default_vsi(struct i40e_hw *hw, u16 vsi_id,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_link_restart_an(struct i40e_hw *hw,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_link_info(struct i40e_hw *hw,
+                               bool enable_lse, struct i40e_link_status *link,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_local_advt_reg(struct i40e_hw *hw,
+                               u64 advt_reg,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_send_driver_version(struct i40e_hw *hw,
+                               struct i40e_driver_version *dv,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_add_vsi(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_vsi_broadcast(struct i40e_hw *hw,
+                               u16 vsi_id, bool set_filter,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_vsi_unicast_promiscuous(struct i40e_hw *hw,
+                               u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_vsi_multicast_promiscuous(struct i40e_hw *hw,
+                               u16 vsi_id, bool set, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_vsi_params(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_update_vsi_params(struct i40e_hw *hw,
+                               struct i40e_vsi_context *vsi_ctx,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_add_veb(struct i40e_hw *hw, u16 uplink_seid,
+                               u16 downlink_seid, u8 enabled_tc,
+                               bool default_port, u16 *pveb_seid,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_veb_parameters(struct i40e_hw *hw,
+                               u16 veb_seid, u16 *switch_id, bool *floating,
+                               u16 *statistic_index, u16 *vebs_used,
+                               u16 *vebs_free,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_add_macvlan(struct i40e_hw *hw, u16 vsi_id,
+                       struct i40e_aqc_add_macvlan_element_data *mv_list,
+                       u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_remove_macvlan(struct i40e_hw *hw, u16 vsi_id,
+                       struct i40e_aqc_remove_macvlan_element_data *mv_list,
+                       u16 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_add_vlan(struct i40e_hw *hw, u16 vsi_id,
+                       struct i40e_aqc_add_remove_vlan_element_data *v_list,
+                       u8 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_remove_vlan(struct i40e_hw *hw, u16 vsi_id,
+                       struct i40e_aqc_add_remove_vlan_element_data *v_list,
+                       u8 count, struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_send_msg_to_vf(struct i40e_hw *hw, u16 vfid,
+                               u32 v_opcode, u32 v_retval, u8 *msg, u16 msglen,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_switch_config(struct i40e_hw *hw,
+                               struct i40e_aqc_get_switch_config_resp *buf,
+                               u16 buf_size, u16 *start_seid,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_request_resource(struct i40e_hw *hw,
+                               enum i40e_aq_resources_ids resource,
+                               enum i40e_aq_resource_access_type access,
+                               u8 sdp_number, u64 *timeout,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_release_resource(struct i40e_hw *hw,
+                               enum i40e_aq_resources_ids resource,
+                               u8 sdp_number,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_read_nvm(struct i40e_hw *hw, u8 module_pointer,
+                               u32 offset, u16 length, void *data,
+                               bool last_command,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_discover_capabilities(struct i40e_hw *hw,
+                               void *buff, u16 buff_size, u16 *data_size,
+                               enum i40e_admin_queue_opc list_type_opc,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_update_nvm(struct i40e_hw *hw, u8 module_pointer,
+                               u32 offset, u16 length, void *data,
+                               bool last_command,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_get_lldp_mib(struct i40e_hw *hw, u8 bridge_type,
+                               u8 mib_type, void *buff, u16 buff_size,
+                               u16 *local_len, u16 *remote_len,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_cfg_lldp_mib_change_event(struct i40e_hw *hw,
+                               bool enable_update,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_stop_lldp(struct i40e_hw *hw, bool shutdown_agent,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_start_lldp(struct i40e_hw *hw,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_delete_element(struct i40e_hw *hw, u16 seid,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_mac_address_write(struct i40e_hw *hw,
+                                   u16 flags, u8 *mac_addr,
+                                   struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_set_hmc_resource_profile(struct i40e_hw *hw,
+                               enum i40e_aq_hmc_profile profile,
+                               u8 pe_vf_enabled_count,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_config_switch_comp_bw_limit(struct i40e_hw *hw,
+                               u16 seid, u16 credit, u8 max_bw,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_config_vsi_tc_bw(struct i40e_hw *hw, u16 seid,
+                       struct i40e_aqc_configure_vsi_tc_bw_data *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_query_vsi_bw_config(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_query_vsi_bw_config_resp *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_query_vsi_ets_sla_config(struct i40e_hw *hw,
+                       u16 seid,
+                       struct i40e_aqc_query_vsi_ets_sla_config_resp *bw_data,
+                       struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_query_switch_comp_ets_config(struct i40e_hw *hw,
+               u16 seid,
+               struct i40e_aqc_query_switching_comp_ets_config_resp *bw_data,
+               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_query_port_ets_config(struct i40e_hw *hw,
+               u16 seid,
+               struct i40e_aqc_query_port_ets_config_resp *bw_data,
+               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_aq_query_switch_comp_bw_config(struct i40e_hw *hw,
+               u16 seid,
+               struct i40e_aqc_query_switching_comp_bw_config_resp *bw_data,
+               struct i40e_asq_cmd_details *cmd_details);
+/* i40e_common */
+i40e_status i40e_init_shared_code(struct i40e_hw *hw);
+i40e_status i40e_pf_reset(struct i40e_hw *hw);
+void i40e_clear_pxe_mode(struct i40e_hw *hw);
+bool i40e_get_link_status(struct i40e_hw *hw);
+i40e_status i40e_get_mac_addr(struct i40e_hw *hw,
+                                               u8 *mac_addr);
+i40e_status i40e_validate_mac_addr(u8 *mac_addr);
+i40e_status i40e_read_lldp_cfg(struct i40e_hw *hw,
+                                       struct i40e_lldp_variables *lldp_cfg);
+/* prototype for functions used for NVM access */
+i40e_status i40e_init_nvm(struct i40e_hw *hw);
+i40e_status i40e_acquire_nvm(struct i40e_hw *hw,
+                                     enum i40e_aq_resource_access_type access);
+void i40e_release_nvm(struct i40e_hw *hw);
+i40e_status i40e_read_nvm_srrd(struct i40e_hw *hw, u16 offset,
+                                        u16 *data);
+i40e_status i40e_read_nvm_word(struct i40e_hw *hw, u16 offset,
+                                        u16 *data);
+i40e_status i40e_read_nvm_buffer(struct i40e_hw *hw, u16 offset,
+                                          u16 *words, u16 *data);
+i40e_status i40e_validate_nvm_checksum(struct i40e_hw *hw,
+                                                u16 *checksum);
+
+/* prototype for functions used for SW locks */
+
+/* i40e_common for VF drivers*/
+void i40e_vf_parse_hw_config(struct i40e_hw *hw,
+                            struct i40e_virtchnl_vf_resource *msg);
+i40e_status i40e_vf_reset(struct i40e_hw *hw);
+i40e_status i40e_aq_send_msg_to_pf(struct i40e_hw *hw,
+                               enum i40e_virtchnl_ops v_opcode,
+                               i40e_status v_retval,
+                               u8 *msg, u16 msglen,
+                               struct i40e_asq_cmd_details *cmd_details);
+i40e_status i40e_set_filter_control(struct i40e_hw *hw,
+                               struct i40e_filter_control_settings *settings);
+#endif /* _I40E_PROTOTYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_register.h b/drivers/net/ethernet/intel/i40e/i40e_register.h
new file mode 100644 (file)
index 0000000..6bd333c
--- /dev/null
@@ -0,0 +1,4688 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_REGISTER_H_
+#define _I40E_REGISTER_H_
+
+#define I40E_GLPCI_PM_MUX_NPQ 0x0009C4F4
+#define I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_SHIFT 0
+#define I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_MASK (0x7 << I40E_GLPCI_PM_MUX_NPQ_NPQ_NUM_PORT_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_SHIFT 16
+#define I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_MASK (0x1F << I40E_GLPCI_PM_MUX_NPQ_INNER_NPQ_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_PFB 0x0009C4F0
+#define I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_SHIFT 0
+#define I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_MASK (0x1F << I40E_GLPCI_PM_MUX_PFB_PFB_PORT_SEL_SHIFT)
+#define I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_SHIFT 16
+#define I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_MASK (0x7 << I40E_GLPCI_PM_MUX_PFB_INNER_PORT_SEL_SHIFT)
+#define I40E_GLPCI_SPARE_BITS_0 0x0009C4F8
+#define I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_SHIFT 0
+#define I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_MASK (0xFFFFFFFF << I40E_GLPCI_SPARE_BITS_0_SPARE_BITS_SHIFT)
+#define I40E_GLPCI_SPARE_BITS_1 0x0009C4FC
+#define I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_SHIFT 0
+#define I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_MASK (0xFFFFFFFF << I40E_GLPCI_SPARE_BITS_1_SPARE_BITS_SHIFT)
+#define I40E_PFPCI_PF_FLUSH_DONE 0x0009C800
+#define I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_MASK (0x1 << I40E_PFPCI_PF_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_VF_FLUSH_DONE 0x0009C600
+#define I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_MASK (0x1 << I40E_PFPCI_VF_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PFPCI_VM_FLUSH_DONE 0x0009C880
+#define I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_SHIFT 0
+#define I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_MASK (0x1 << I40E_PFPCI_VM_FLUSH_DONE_FLUSH_DONE_SHIFT)
+#define I40E_PF_ARQBAH 0x00080180
+#define I40E_PF_ARQBAH_ARQBAH_SHIFT 0
+#define I40E_PF_ARQBAH_ARQBAH_MASK (0xFFFFFFFF << I40E_PF_ARQBAH_ARQBAH_SHIFT)
+#define I40E_PF_ARQBAL 0x00080080
+#define I40E_PF_ARQBAL_ARQBAL_SHIFT 0
+#define I40E_PF_ARQBAL_ARQBAL_MASK (0xFFFFFFFF << I40E_PF_ARQBAL_ARQBAL_SHIFT)
+#define I40E_PF_ARQH 0x00080380
+#define I40E_PF_ARQH_ARQH_SHIFT 0
+#define I40E_PF_ARQH_ARQH_MASK (0x3FF << I40E_PF_ARQH_ARQH_SHIFT)
+#define I40E_PF_ARQLEN 0x00080280
+#define I40E_PF_ARQLEN_ARQLEN_SHIFT 0
+#define I40E_PF_ARQLEN_ARQLEN_MASK (0x3FF << I40E_PF_ARQLEN_ARQLEN_SHIFT)
+#define I40E_PF_ARQLEN_ARQVFE_SHIFT 28
+#define I40E_PF_ARQLEN_ARQVFE_MASK (0x1 << I40E_PF_ARQLEN_ARQVFE_SHIFT)
+#define I40E_PF_ARQLEN_ARQOVFL_SHIFT 29
+#define I40E_PF_ARQLEN_ARQOVFL_MASK (0x1 << I40E_PF_ARQLEN_ARQOVFL_SHIFT)
+#define I40E_PF_ARQLEN_ARQCRIT_SHIFT 30
+#define I40E_PF_ARQLEN_ARQCRIT_MASK (0x1 << I40E_PF_ARQLEN_ARQCRIT_SHIFT)
+#define I40E_PF_ARQLEN_ARQENABLE_SHIFT 31
+#define I40E_PF_ARQLEN_ARQENABLE_MASK (0x1 << I40E_PF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_PF_ARQT 0x00080480
+#define I40E_PF_ARQT_ARQT_SHIFT 0
+#define I40E_PF_ARQT_ARQT_MASK (0x3FF << I40E_PF_ARQT_ARQT_SHIFT)
+#define I40E_PF_ATQBAH 0x00080100
+#define I40E_PF_ATQBAH_ATQBAH_SHIFT 0
+#define I40E_PF_ATQBAH_ATQBAH_MASK (0xFFFFFFFF << I40E_PF_ATQBAH_ATQBAH_SHIFT)
+#define I40E_PF_ATQBAL 0x00080000
+#define I40E_PF_ATQBAL_ATQBAL_SHIFT 0
+#define I40E_PF_ATQBAL_ATQBAL_MASK (0xFFFFFFFF << I40E_PF_ATQBAL_ATQBAL_SHIFT)
+#define I40E_PF_ATQH 0x00080300
+#define I40E_PF_ATQH_ATQH_SHIFT 0
+#define I40E_PF_ATQH_ATQH_MASK (0x3FF << I40E_PF_ATQH_ATQH_SHIFT)
+#define I40E_PF_ATQLEN 0x00080200
+#define I40E_PF_ATQLEN_ATQLEN_SHIFT 0
+#define I40E_PF_ATQLEN_ATQLEN_MASK (0x3FF << I40E_PF_ATQLEN_ATQLEN_SHIFT)
+#define I40E_PF_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_PF_ATQLEN_ATQVFE_MASK (0x1 << I40E_PF_ATQLEN_ATQVFE_SHIFT)
+#define I40E_PF_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_PF_ATQLEN_ATQOVFL_MASK (0x1 << I40E_PF_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_PF_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_PF_ATQLEN_ATQCRIT_MASK (0x1 << I40E_PF_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_PF_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_PF_ATQLEN_ATQENABLE_MASK (0x1 << I40E_PF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_PF_ATQT 0x00080400
+#define I40E_PF_ATQT_ATQT_SHIFT 0
+#define I40E_PF_ATQT_ATQT_MASK (0x3FF << I40E_PF_ATQT_ATQT_SHIFT)
+#define I40E_VF_ARQBAH(_VF) (0x00081400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ARQBAH_MAX_INDEX 127
+#define I40E_VF_ARQBAH_ARQBAH_SHIFT 0
+#define I40E_VF_ARQBAH_ARQBAH_MASK (0xFFFFFFFF << I40E_VF_ARQBAH_ARQBAH_SHIFT)
+#define I40E_VF_ARQBAL(_VF) (0x00080C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ARQBAL_MAX_INDEX 127
+#define I40E_VF_ARQBAL_ARQBAL_SHIFT 0
+#define I40E_VF_ARQBAL_ARQBAL_MASK (0xFFFFFFFF << I40E_VF_ARQBAL_ARQBAL_SHIFT)
+#define I40E_VF_ARQH(_VF) (0x00082400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ARQH_MAX_INDEX 127
+#define I40E_VF_ARQH_ARQH_SHIFT 0
+#define I40E_VF_ARQH_ARQH_MASK (0x3FF << I40E_VF_ARQH_ARQH_SHIFT)
+#define I40E_VF_ARQLEN(_VF) (0x00081C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ARQLEN_MAX_INDEX 127
+#define I40E_VF_ARQLEN_ARQLEN_SHIFT 0
+#define I40E_VF_ARQLEN_ARQLEN_MASK (0x3FF << I40E_VF_ARQLEN_ARQLEN_SHIFT)
+#define I40E_VF_ARQLEN_ARQVFE_SHIFT 28
+#define I40E_VF_ARQLEN_ARQVFE_MASK (0x1 << I40E_VF_ARQLEN_ARQVFE_SHIFT)
+#define I40E_VF_ARQLEN_ARQOVFL_SHIFT 29
+#define I40E_VF_ARQLEN_ARQOVFL_MASK (0x1 << I40E_VF_ARQLEN_ARQOVFL_SHIFT)
+#define I40E_VF_ARQLEN_ARQCRIT_SHIFT 30
+#define I40E_VF_ARQLEN_ARQCRIT_MASK (0x1 << I40E_VF_ARQLEN_ARQCRIT_SHIFT)
+#define I40E_VF_ARQLEN_ARQENABLE_SHIFT 31
+#define I40E_VF_ARQLEN_ARQENABLE_MASK (0x1 << I40E_VF_ARQLEN_ARQENABLE_SHIFT)
+#define I40E_VF_ARQT(_VF) (0x00082C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ARQT_MAX_INDEX 127
+#define I40E_VF_ARQT_ARQT_SHIFT 0
+#define I40E_VF_ARQT_ARQT_MASK (0x3FF << I40E_VF_ARQT_ARQT_SHIFT)
+#define I40E_VF_ATQBAH(_VF) (0x00081000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ATQBAH_MAX_INDEX 127
+#define I40E_VF_ATQBAH_ATQBAH_SHIFT 0
+#define I40E_VF_ATQBAH_ATQBAH_MASK (0xFFFFFFFF << I40E_VF_ATQBAH_ATQBAH_SHIFT)
+#define I40E_VF_ATQBAL(_VF) (0x00080800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ATQBAL_MAX_INDEX 127
+#define I40E_VF_ATQBAL_ATQBAL_SHIFT 0
+#define I40E_VF_ATQBAL_ATQBAL_MASK (0xFFFFFFFF << I40E_VF_ATQBAL_ATQBAL_SHIFT)
+#define I40E_VF_ATQH(_VF) (0x00082000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ATQH_MAX_INDEX 127
+#define I40E_VF_ATQH_ATQH_SHIFT 0
+#define I40E_VF_ATQH_ATQH_MASK (0x3FF << I40E_VF_ATQH_ATQH_SHIFT)
+#define I40E_VF_ATQLEN(_VF) (0x00081800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ATQLEN_MAX_INDEX 127
+#define I40E_VF_ATQLEN_ATQLEN_SHIFT 0
+#define I40E_VF_ATQLEN_ATQLEN_MASK (0x3FF << I40E_VF_ATQLEN_ATQLEN_SHIFT)
+#define I40E_VF_ATQLEN_ATQVFE_SHIFT 28
+#define I40E_VF_ATQLEN_ATQVFE_MASK (0x1 << I40E_VF_ATQLEN_ATQVFE_SHIFT)
+#define I40E_VF_ATQLEN_ATQOVFL_SHIFT 29
+#define I40E_VF_ATQLEN_ATQOVFL_MASK (0x1 << I40E_VF_ATQLEN_ATQOVFL_SHIFT)
+#define I40E_VF_ATQLEN_ATQCRIT_SHIFT 30
+#define I40E_VF_ATQLEN_ATQCRIT_MASK (0x1 << I40E_VF_ATQLEN_ATQCRIT_SHIFT)
+#define I40E_VF_ATQLEN_ATQENABLE_SHIFT 31
+#define I40E_VF_ATQLEN_ATQENABLE_MASK (0x1 << I40E_VF_ATQLEN_ATQENABLE_SHIFT)
+#define I40E_VF_ATQT(_VF) (0x00082800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VF_ATQT_MAX_INDEX 127
+#define I40E_VF_ATQT_ATQT_SHIFT 0
+#define I40E_VF_ATQT_ATQT_MASK (0x3FF << I40E_VF_ATQT_ATQT_SHIFT)
+#define I40E_PRT_L2TAGSEN 0x001C0B20
+#define I40E_PRT_L2TAGSEN_ENABLE_SHIFT 0
+#define I40E_PRT_L2TAGSEN_ENABLE_MASK (0xFF << I40E_PRT_L2TAGSEN_ENABLE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA 0x0010C080
+#define I40E_PFCM_LAN_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_PFCM_LAN_ERRDATA_ERROR_CODE_MASK (0xF << I40E_PFCM_LAN_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_PFCM_LAN_ERRDATA_Q_TYPE_MASK (0x7 << I40E_PFCM_LAN_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_PFCM_LAN_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_PFCM_LAN_ERRDATA_Q_NUM_MASK (0xFFF << I40E_PFCM_LAN_ERRDATA_Q_NUM_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO 0x0010C000
+#define I40E_PFCM_LAN_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_PFCM_LAN_ERRINFO_ERROR_VALID_MASK (0x1 << I40E_PFCM_LAN_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_PFCM_LAN_ERRINFO_ERROR_INST_MASK (0x7 << I40E_PFCM_LAN_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_MASK (0xFF << I40E_PFCM_LAN_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_MASK (0xFF << I40E_PFCM_LAN_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_MASK (0xFF << I40E_PFCM_LAN_ERRINFO_RLS_ERROR_CNT_SHIFT)
+#define I40E_PFCM_LANCTXCTL 0x0010C300
+#define I40E_PFCM_LANCTXCTL_QUEUE_NUM_SHIFT 0
+#define I40E_PFCM_LANCTXCTL_QUEUE_NUM_MASK (0xFFF << I40E_PFCM_LANCTXCTL_QUEUE_NUM_SHIFT)
+#define I40E_PFCM_LANCTXCTL_SUB_LINE_SHIFT 12
+#define I40E_PFCM_LANCTXCTL_SUB_LINE_MASK (0x7 << I40E_PFCM_LANCTXCTL_SUB_LINE_SHIFT)
+#define I40E_PFCM_LANCTXCTL_QUEUE_TYPE_SHIFT 15
+#define I40E_PFCM_LANCTXCTL_QUEUE_TYPE_MASK (0x3 << I40E_PFCM_LANCTXCTL_QUEUE_TYPE_SHIFT)
+#define I40E_PFCM_LANCTXCTL_OP_CODE_SHIFT 17
+#define I40E_PFCM_LANCTXCTL_OP_CODE_MASK (0x3 << I40E_PFCM_LANCTXCTL_OP_CODE_SHIFT)
+#define I40E_PFCM_LANCTXDATA(_i) (0x0010C100 + ((_i) * 128)) /* _i=0...3 */
+#define I40E_PFCM_LANCTXDATA_MAX_INDEX 3
+#define I40E_PFCM_LANCTXDATA_DATA_SHIFT 0
+#define I40E_PFCM_LANCTXDATA_DATA_MASK (0xFFFFFFFF << I40E_PFCM_LANCTXDATA_DATA_SHIFT)
+#define I40E_PFCM_LANCTXSTAT 0x0010C380
+#define I40E_PFCM_LANCTXSTAT_CTX_DONE_SHIFT 0
+#define I40E_PFCM_LANCTXSTAT_CTX_DONE_MASK (0x1 << I40E_PFCM_LANCTXSTAT_CTX_DONE_SHIFT)
+#define I40E_PFCM_LANCTXSTAT_CTX_MISS_SHIFT 1
+#define I40E_PFCM_LANCTXSTAT_CTX_MISS_MASK (0x1 << I40E_PFCM_LANCTXSTAT_CTX_MISS_SHIFT)
+#define I40E_PFCM_PE_ERRDATA 0x00138D00
+#define I40E_PFCM_PE_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_PFCM_PE_ERRDATA_ERROR_CODE_MASK (0xF << I40E_PFCM_PE_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_PFCM_PE_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_PFCM_PE_ERRDATA_Q_TYPE_MASK (0x7 << I40E_PFCM_PE_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_PFCM_PE_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_PFCM_PE_ERRDATA_Q_NUM_MASK (0x3FFFF << I40E_PFCM_PE_ERRDATA_Q_NUM_SHIFT)
+#define I40E_PFCM_PE_ERRINFO 0x00138C80
+#define I40E_PFCM_PE_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_PFCM_PE_ERRINFO_ERROR_VALID_MASK (0x1 << I40E_PFCM_PE_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_PFCM_PE_ERRINFO_ERROR_INST_MASK (0x7 << I40E_PFCM_PE_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_MASK (0xFF << I40E_PFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_MASK (0xFF << I40E_PFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_MASK (0xFF << I40E_PFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1(_VF) (0x00138800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFCM_PE_ERRDATA1_MAX_INDEX 127
+#define I40E_VFCM_PE_ERRDATA1_ERROR_CODE_SHIFT 0
+#define I40E_VFCM_PE_ERRDATA1_ERROR_CODE_MASK (0xF << I40E_VFCM_PE_ERRDATA1_ERROR_CODE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1_Q_TYPE_SHIFT 4
+#define I40E_VFCM_PE_ERRDATA1_Q_TYPE_MASK (0x7 << I40E_VFCM_PE_ERRDATA1_Q_TYPE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA1_Q_NUM_SHIFT 8
+#define I40E_VFCM_PE_ERRDATA1_Q_NUM_MASK (0x3FFFF << I40E_VFCM_PE_ERRDATA1_Q_NUM_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1(_VF) (0x00138400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFCM_PE_ERRINFO1_MAX_INDEX 127
+#define I40E_VFCM_PE_ERRINFO1_ERROR_VALID_SHIFT 0
+#define I40E_VFCM_PE_ERRINFO1_ERROR_VALID_MASK (0x1 << I40E_VFCM_PE_ERRINFO1_ERROR_VALID_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_ERROR_INST_SHIFT 4
+#define I40E_VFCM_PE_ERRINFO1_ERROR_INST_MASK (0x7 << I40E_VFCM_PE_ERRINFO1_ERROR_INST_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_SHIFT 8
+#define I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO1_DBL_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_SHIFT 16
+#define I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO1_RLU_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_SHIFT 24
+#define I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO1_RLS_ERROR_CNT_SHIFT)
+#define I40E_GLDCB_GENC 0x00083044
+#define I40E_GLDCB_GENC_PCIRTT_SHIFT 0
+#define I40E_GLDCB_GENC_PCIRTT_MASK (0xFFFF << I40E_GLDCB_GENC_PCIRTT_SHIFT)
+#define I40E_GLDCB_RUPTI 0x00122618
+#define I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_SHIFT 0
+#define I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_MASK (0xFFFFFFFF << I40E_GLDCB_RUPTI_PFCTIMEOUT_UP_SHIFT)
+#define I40E_PRTDCB_FCCFG 0x001E4640
+#define I40E_PRTDCB_FCCFG_TFCE_SHIFT 3
+#define I40E_PRTDCB_FCCFG_TFCE_MASK (0x3 << I40E_PRTDCB_FCCFG_TFCE_SHIFT)
+#define I40E_PRTDCB_FCRTV 0x001E4600
+#define I40E_PRTDCB_FCRTV_FC_REFRESH_TH_SHIFT 0
+#define I40E_PRTDCB_FCRTV_FC_REFRESH_TH_MASK (0xFFFF << I40E_PRTDCB_FCRTV_FC_REFRESH_TH_SHIFT)
+#define I40E_PRTDCB_FCTTVN(_i) (0x001E4580 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRTDCB_FCTTVN_MAX_INDEX 3
+#define I40E_PRTDCB_FCTTVN_TTV_2N_SHIFT 0
+#define I40E_PRTDCB_FCTTVN_TTV_2N_MASK (0xFFFF << I40E_PRTDCB_FCTTVN_TTV_2N_SHIFT)
+#define I40E_PRTDCB_FCTTVN_TTV_2N_P1_SHIFT 16
+#define I40E_PRTDCB_FCTTVN_TTV_2N_P1_MASK (0xFFFF << I40E_PRTDCB_FCTTVN_TTV_2N_P1_SHIFT)
+#define I40E_PRTDCB_GENC 0x00083000
+#define I40E_PRTDCB_GENC_RESERVED_1_SHIFT 0
+#define I40E_PRTDCB_GENC_RESERVED_1_MASK (0x3 << I40E_PRTDCB_GENC_RESERVED_1_SHIFT)
+#define I40E_PRTDCB_GENC_NUMTC_SHIFT 2
+#define I40E_PRTDCB_GENC_NUMTC_MASK (0xF << I40E_PRTDCB_GENC_NUMTC_SHIFT)
+#define I40E_PRTDCB_GENC_FCOEUP_SHIFT 6
+#define I40E_PRTDCB_GENC_FCOEUP_MASK (0x7 << I40E_PRTDCB_GENC_FCOEUP_SHIFT)
+#define I40E_PRTDCB_GENC_FCOEUP_VALID_SHIFT 9
+#define I40E_PRTDCB_GENC_FCOEUP_VALID_MASK (0x1 << I40E_PRTDCB_GENC_FCOEUP_VALID_SHIFT)
+#define I40E_PRTDCB_GENC_PFCLDA_SHIFT 16
+#define I40E_PRTDCB_GENC_PFCLDA_MASK (0xFFFF << I40E_PRTDCB_GENC_PFCLDA_SHIFT)
+#define I40E_PRTDCB_GENS 0x00083020
+#define I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT 0
+#define I40E_PRTDCB_GENS_DCBX_STATUS_MASK (0x7 << I40E_PRTDCB_GENS_DCBX_STATUS_SHIFT)
+#define I40E_PRTDCB_MFLCN 0x001E2400
+#define I40E_PRTDCB_MFLCN_PMCF_SHIFT 0
+#define I40E_PRTDCB_MFLCN_PMCF_MASK (0x1 << I40E_PRTDCB_MFLCN_PMCF_SHIFT)
+#define I40E_PRTDCB_MFLCN_DPF_SHIFT 1
+#define I40E_PRTDCB_MFLCN_DPF_MASK (0x1 << I40E_PRTDCB_MFLCN_DPF_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCM_SHIFT 2
+#define I40E_PRTDCB_MFLCN_RPFCM_MASK (0x1 << I40E_PRTDCB_MFLCN_RPFCM_SHIFT)
+#define I40E_PRTDCB_MFLCN_RFCE_SHIFT 3
+#define I40E_PRTDCB_MFLCN_RFCE_MASK (0x1 << I40E_PRTDCB_MFLCN_RFCE_SHIFT)
+#define I40E_PRTDCB_MFLCN_RPFCE_SHIFT 4
+#define I40E_PRTDCB_MFLCN_RPFCE_MASK (0xFF << I40E_PRTDCB_MFLCN_RPFCE_SHIFT)
+#define I40E_PRTDCB_RETSC 0x001223E0
+#define I40E_PRTDCB_RETSC_ETS_MODE_SHIFT 0
+#define I40E_PRTDCB_RETSC_ETS_MODE_MASK (0x1 << I40E_PRTDCB_RETSC_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT 1
+#define I40E_PRTDCB_RETSC_NON_ETS_MODE_MASK (0x1 << I40E_PRTDCB_RETSC_NON_ETS_MODE_SHIFT)
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT 2
+#define I40E_PRTDCB_RETSC_ETS_MAX_EXP_MASK (0xF << I40E_PRTDCB_RETSC_ETS_MAX_EXP_SHIFT)
+#define I40E_PRTDCB_RETSC_LLTC_SHIFT 8
+#define I40E_PRTDCB_RETSC_LLTC_MASK (0xFF << I40E_PRTDCB_RETSC_LLTC_SHIFT)
+#define I40E_PRTDCB_RETSTCC(_i) (0x00122180 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTDCB_RETSTCC_MAX_INDEX 7
+#define I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT 0
+#define I40E_PRTDCB_RETSTCC_BWSHARE_MASK (0x7F << I40E_PRTDCB_RETSTCC_BWSHARE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT 30
+#define I40E_PRTDCB_RETSTCC_UPINTC_MODE_MASK (0x1 << I40E_PRTDCB_RETSTCC_UPINTC_MODE_SHIFT)
+#define I40E_PRTDCB_RETSTCC_ETSTC_SHIFT 31
+#define I40E_PRTDCB_RETSTCC_ETSTC_MASK (0x1 << I40E_PRTDCB_RETSTCC_ETSTC_SHIFT)
+#define I40E_PRTDCB_RPPMC 0x001223A0
+#define I40E_PRTDCB_RPPMC_LANRPPM_SHIFT 0
+#define I40E_PRTDCB_RPPMC_LANRPPM_MASK (0xFF << I40E_PRTDCB_RPPMC_LANRPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT 8
+#define I40E_PRTDCB_RPPMC_RDMARPPM_MASK (0xFF << I40E_PRTDCB_RPPMC_RDMARPPM_SHIFT)
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT 16
+#define I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_MASK (0xFF << I40E_PRTDCB_RPPMC_RX_FIFO_SIZE_SHIFT)
+#define I40E_PRTDCB_RUP 0x001C0B00
+#define I40E_PRTDCB_RUP_NOVLANUP_SHIFT 0
+#define I40E_PRTDCB_RUP_NOVLANUP_MASK (0x7 << I40E_PRTDCB_RUP_NOVLANUP_SHIFT)
+#define I40E_PRTDCB_RUP2TC 0x001C09A0
+#define I40E_PRTDCB_RUP2TC_UP0TC_SHIFT 0
+#define I40E_PRTDCB_RUP2TC_UP0TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP0TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP1TC_SHIFT 3
+#define I40E_PRTDCB_RUP2TC_UP1TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP1TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP2TC_SHIFT 6
+#define I40E_PRTDCB_RUP2TC_UP2TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP2TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP3TC_SHIFT 9
+#define I40E_PRTDCB_RUP2TC_UP3TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP3TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP4TC_SHIFT 12
+#define I40E_PRTDCB_RUP2TC_UP4TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP4TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP5TC_SHIFT 15
+#define I40E_PRTDCB_RUP2TC_UP5TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP5TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP6TC_SHIFT 18
+#define I40E_PRTDCB_RUP2TC_UP6TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP6TC_SHIFT)
+#define I40E_PRTDCB_RUP2TC_UP7TC_SHIFT 21
+#define I40E_PRTDCB_RUP2TC_UP7TC_MASK (0x7 << I40E_PRTDCB_RUP2TC_UP7TC_SHIFT)
+#define I40E_PRTDCB_TC2PFC 0x001C0980
+#define I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT 0
+#define I40E_PRTDCB_TC2PFC_TC2PFC_MASK (0xFF << I40E_PRTDCB_TC2PFC_TC2PFC_SHIFT)
+#define I40E_PRTDCB_TCPMC 0x000A21A0
+#define I40E_PRTDCB_TCPMC_CPM_SHIFT 0
+#define I40E_PRTDCB_TCPMC_CPM_MASK (0x1FFF << I40E_PRTDCB_TCPMC_CPM_SHIFT)
+#define I40E_PRTDCB_TCPMC_LLTC_SHIFT 13
+#define I40E_PRTDCB_TCPMC_LLTC_MASK (0xFF << I40E_PRTDCB_TCPMC_LLTC_SHIFT)
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TCPMC_TCPM_MODE_MASK (0x1 << I40E_PRTDCB_TCPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TCWSTC(_i) (0x000A2040 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTDCB_TCWSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TCWSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TCWSTC_MSTC_MASK (0xFFFFF << I40E_PRTDCB_TCWSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TDPMC 0x000A0180
+#define I40E_PRTDCB_TDPMC_DPM_SHIFT 0
+#define I40E_PRTDCB_TDPMC_DPM_MASK (0xFF << I40E_PRTDCB_TDPMC_DPM_SHIFT)
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT 30
+#define I40E_PRTDCB_TDPMC_TCPM_MODE_MASK (0x1 << I40E_PRTDCB_TDPMC_TCPM_MODE_SHIFT)
+#define I40E_PRTDCB_TDPUC 0x00044100
+#define I40E_PRTDCB_TDPUC_MAX_TXFRAME_SHIFT 0
+#define I40E_PRTDCB_TDPUC_MAX_TXFRAME_MASK (0xFFFF << I40E_PRTDCB_TDPUC_MAX_TXFRAME_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB 0x000AE060
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_MASK (0x1 << I40E_PRTDCB_TETSC_TCB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TCB_LLTC_MASK (0xFF << I40E_PRTDCB_TETSC_TCB_LLTC_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB 0x00098060
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT 0
+#define I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_MASK (0x1 << I40E_PRTDCB_TETSC_TPB_EN_LL_STRICT_PRIORITY_SHIFT)
+#define I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT 8
+#define I40E_PRTDCB_TETSC_TPB_LLTC_MASK (0xFF << I40E_PRTDCB_TETSC_TPB_LLTC_SHIFT)
+#define I40E_PRTDCB_TFCS 0x001E4560
+#define I40E_PRTDCB_TFCS_TXOFF_SHIFT 0
+#define I40E_PRTDCB_TFCS_TXOFF_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF0_SHIFT 8
+#define I40E_PRTDCB_TFCS_TXOFF0_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF0_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF1_SHIFT 9
+#define I40E_PRTDCB_TFCS_TXOFF1_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF1_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF2_SHIFT 10
+#define I40E_PRTDCB_TFCS_TXOFF2_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF2_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF3_SHIFT 11
+#define I40E_PRTDCB_TFCS_TXOFF3_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF3_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF4_SHIFT 12
+#define I40E_PRTDCB_TFCS_TXOFF4_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF4_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF5_SHIFT 13
+#define I40E_PRTDCB_TFCS_TXOFF5_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF5_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF6_SHIFT 14
+#define I40E_PRTDCB_TFCS_TXOFF6_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF6_SHIFT)
+#define I40E_PRTDCB_TFCS_TXOFF7_SHIFT 15
+#define I40E_PRTDCB_TFCS_TXOFF7_MASK (0x1 << I40E_PRTDCB_TFCS_TXOFF7_SHIFT)
+#define I40E_PRTDCB_TFWSTC(_i) (0x000A0040 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTDCB_TFWSTC_MAX_INDEX 7
+#define I40E_PRTDCB_TFWSTC_MSTC_SHIFT 0
+#define I40E_PRTDCB_TFWSTC_MSTC_MASK (0xFFFFF << I40E_PRTDCB_TFWSTC_MSTC_SHIFT)
+#define I40E_PRTDCB_TPFCTS(_i) (0x001E4660 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTDCB_TPFCTS_MAX_INDEX 7
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT 0
+#define I40E_PRTDCB_TPFCTS_PFCTIMER_MASK (0x3FFF << I40E_PRTDCB_TPFCTS_PFCTIMER_SHIFT)
+#define I40E_GLFCOE_RCTL 0x00269B94
+#define I40E_GLFCOE_RCTL_FCOEVER_SHIFT 0
+#define I40E_GLFCOE_RCTL_FCOEVER_MASK (0xF << I40E_GLFCOE_RCTL_FCOEVER_SHIFT)
+#define I40E_GLFCOE_RCTL_SAVBAD_SHIFT 4
+#define I40E_GLFCOE_RCTL_SAVBAD_MASK (0x1 << I40E_GLFCOE_RCTL_SAVBAD_SHIFT)
+#define I40E_GLFCOE_RCTL_ICRC_SHIFT 5
+#define I40E_GLFCOE_RCTL_ICRC_MASK (0x1 << I40E_GLFCOE_RCTL_ICRC_SHIFT)
+#define I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT 16
+#define I40E_GLFCOE_RCTL_MAX_SIZE_MASK (0x3FFF << I40E_GLFCOE_RCTL_MAX_SIZE_SHIFT)
+#define I40E_GL_FWSTS 0x00083048
+#define I40E_GL_FWSTS_FWS0B_SHIFT 0
+#define I40E_GL_FWSTS_FWS0B_MASK (0xFF << I40E_GL_FWSTS_FWS0B_SHIFT)
+#define I40E_GL_FWSTS_FWRI_SHIFT 9
+#define I40E_GL_FWSTS_FWRI_MASK (0x1 << I40E_GL_FWSTS_FWRI_SHIFT)
+#define I40E_GL_FWSTS_FWS1B_SHIFT 16
+#define I40E_GL_FWSTS_FWS1B_MASK (0xFF << I40E_GL_FWSTS_FWS1B_SHIFT)
+#define I40E_GLGEN_CLKSTAT 0x000B8184
+#define I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT 0
+#define I40E_GLGEN_CLKSTAT_CLKMODE_MASK (0x1 << I40E_GLGEN_CLKSTAT_CLKMODE_SHIFT)
+#define I40E_GLGEN_CLKSTAT_U_CLK_SPEED_SHIFT 4
+#define I40E_GLGEN_CLKSTAT_U_CLK_SPEED_MASK (0x3 << I40E_GLGEN_CLKSTAT_U_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_SHIFT 8
+#define I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_MASK (0x7 << I40E_GLGEN_CLKSTAT_P0_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_SHIFT 12
+#define I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_MASK (0x7 << I40E_GLGEN_CLKSTAT_P1_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_SHIFT 16
+#define I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_MASK (0x7 << I40E_GLGEN_CLKSTAT_P2_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_SHIFT 20
+#define I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_MASK (0x7 << I40E_GLGEN_CLKSTAT_P3_CLK_SPEED_SHIFT)
+#define I40E_GLGEN_GPIO_CTL(_i) (0x00088100 + ((_i) * 4)) /* _i=0...29 */
+#define I40E_GLGEN_GPIO_CTL_MAX_INDEX 29
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT 0
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_MASK (0x3 << I40E_GLGEN_GPIO_CTL_PRT_NUM_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT 3
+#define I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_MASK (0x1 << I40E_GLGEN_GPIO_CTL_PRT_NUM_NA_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT 4
+#define I40E_GLGEN_GPIO_CTL_PIN_DIR_MASK (0x1 << I40E_GLGEN_GPIO_CTL_PIN_DIR_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT 5
+#define I40E_GLGEN_GPIO_CTL_TRI_CTL_MASK (0x1 << I40E_GLGEN_GPIO_CTL_TRI_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT 6
+#define I40E_GLGEN_GPIO_CTL_OUT_CTL_MASK (0x1 << I40E_GLGEN_GPIO_CTL_OUT_CTL_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT 7
+#define I40E_GLGEN_GPIO_CTL_PIN_FUNC_MASK (0x7 << I40E_GLGEN_GPIO_CTL_PIN_FUNC_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_INVRT_SHIFT 10
+#define I40E_GLGEN_GPIO_CTL_LED_INVRT_MASK (0x1 << I40E_GLGEN_GPIO_CTL_LED_INVRT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT 11
+#define I40E_GLGEN_GPIO_CTL_LED_BLINK_MASK (0x1 << I40E_GLGEN_GPIO_CTL_LED_BLINK_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT 12
+#define I40E_GLGEN_GPIO_CTL_LED_MODE_MASK (0xF << I40E_GLGEN_GPIO_CTL_LED_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_INT_MODE_SHIFT 17
+#define I40E_GLGEN_GPIO_CTL_INT_MODE_MASK (0x3 << I40E_GLGEN_GPIO_CTL_INT_MODE_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT 19
+#define I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_MASK (0x1 << I40E_GLGEN_GPIO_CTL_OUT_DEFAULT_SHIFT)
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT 20
+#define I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_MASK (0x3F << I40E_GLGEN_GPIO_CTL_PHY_PIN_NAME_SHIFT)
+#define I40E_GLGEN_GPIO_SET 0x00088184
+#define I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT 0
+#define I40E_GLGEN_GPIO_SET_GPIO_INDX_MASK (0x1F << I40E_GLGEN_GPIO_SET_GPIO_INDX_SHIFT)
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT 5
+#define I40E_GLGEN_GPIO_SET_SDP_DATA_MASK (0x1 << I40E_GLGEN_GPIO_SET_SDP_DATA_SHIFT)
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT 6
+#define I40E_GLGEN_GPIO_SET_DRIVE_SDP_MASK (0x1 << I40E_GLGEN_GPIO_SET_DRIVE_SDP_SHIFT)
+#define I40E_GLGEN_GPIO_STAT 0x0008817C
+#define I40E_GLGEN_GPIO_STAT_GPIO_VALUE_SHIFT 0
+#define I40E_GLGEN_GPIO_STAT_GPIO_VALUE_MASK (0x3FFFFFFF << I40E_GLGEN_GPIO_STAT_GPIO_VALUE_SHIFT)
+#define I40E_GLGEN_GPIO_TRANSIT 0x00088180
+#define I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_SHIFT 0
+#define I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_MASK (0x3FFFFFFF << I40E_GLGEN_GPIO_TRANSIT_GPIO_TRANSITION_SHIFT)
+#define I40E_GLGEN_I2CCMD(_i) (0x000881E0 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_I2CCMD_MAX_INDEX 3
+#define I40E_GLGEN_I2CCMD_DATA_SHIFT 0
+#define I40E_GLGEN_I2CCMD_DATA_MASK (0xFFFF << I40E_GLGEN_I2CCMD_DATA_SHIFT)
+#define I40E_GLGEN_I2CCMD_REGADD_SHIFT 16
+#define I40E_GLGEN_I2CCMD_REGADD_MASK (0xFF << I40E_GLGEN_I2CCMD_REGADD_SHIFT)
+#define I40E_GLGEN_I2CCMD_PHYADD_SHIFT 24
+#define I40E_GLGEN_I2CCMD_PHYADD_MASK (0x7 << I40E_GLGEN_I2CCMD_PHYADD_SHIFT)
+#define I40E_GLGEN_I2CCMD_OP_SHIFT 27
+#define I40E_GLGEN_I2CCMD_OP_MASK (0x1 << I40E_GLGEN_I2CCMD_OP_SHIFT)
+#define I40E_GLGEN_I2CCMD_RESET_SHIFT 28
+#define I40E_GLGEN_I2CCMD_RESET_MASK (0x1 << I40E_GLGEN_I2CCMD_RESET_SHIFT)
+#define I40E_GLGEN_I2CCMD_R_SHIFT 29
+#define I40E_GLGEN_I2CCMD_R_MASK (0x1 << I40E_GLGEN_I2CCMD_R_SHIFT)
+#define I40E_GLGEN_I2CCMD_E_SHIFT 31
+#define I40E_GLGEN_I2CCMD_E_MASK (0x1 << I40E_GLGEN_I2CCMD_E_SHIFT)
+#define I40E_GLGEN_I2CPARAMS(_i) (0x000881AC + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_I2CPARAMS_MAX_INDEX 3
+#define I40E_GLGEN_I2CPARAMS_WRITE_TIME_SHIFT 0
+#define I40E_GLGEN_I2CPARAMS_WRITE_TIME_MASK (0x1F << I40E_GLGEN_I2CPARAMS_WRITE_TIME_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_READ_TIME_SHIFT 5
+#define I40E_GLGEN_I2CPARAMS_READ_TIME_MASK (0x7 << I40E_GLGEN_I2CPARAMS_READ_TIME_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_I2CBB_EN_SHIFT 8
+#define I40E_GLGEN_I2CPARAMS_I2CBB_EN_MASK (0x1 << I40E_GLGEN_I2CPARAMS_I2CBB_EN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_SHIFT 9
+#define I40E_GLGEN_I2CPARAMS_CLK_MASK (0x1 << I40E_GLGEN_I2CPARAMS_CLK_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_OUT_SHIFT 10
+#define I40E_GLGEN_I2CPARAMS_DATA_OUT_MASK (0x1 << I40E_GLGEN_I2CPARAMS_DATA_OUT_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_OE_N_SHIFT 11
+#define I40E_GLGEN_I2CPARAMS_DATA_OE_N_MASK (0x1 << I40E_GLGEN_I2CPARAMS_DATA_OE_N_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_DATA_IN_SHIFT 12
+#define I40E_GLGEN_I2CPARAMS_DATA_IN_MASK (0x1 << I40E_GLGEN_I2CPARAMS_DATA_IN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_OE_N_SHIFT 13
+#define I40E_GLGEN_I2CPARAMS_CLK_OE_N_MASK (0x1 << I40E_GLGEN_I2CPARAMS_CLK_OE_N_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_IN_SHIFT 14
+#define I40E_GLGEN_I2CPARAMS_CLK_IN_MASK (0x1 << I40E_GLGEN_I2CPARAMS_CLK_IN_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_SHIFT 15
+#define I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_MASK (0x1 << I40E_GLGEN_I2CPARAMS_CLK_STRETCH_DIS_SHIFT)
+#define I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_SHIFT 31
+#define I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_MASK (0x1 << I40E_GLGEN_I2CPARAMS_I2C_DATA_ORDER_SHIFT)
+#define I40E_GLGEN_LED_CTL 0x00088178
+#define I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_SHIFT 0
+#define I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_MASK (0x1 << I40E_GLGEN_LED_CTL_GLOBAL_BLINK_MODE_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL(_i) (0x000881D0 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_MDIO_CTRL_MAX_INDEX 3
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_SHIFT 0
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_MASK (0x1FFFF << I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD2_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT 17
+#define I40E_GLGEN_MDIO_CTRL_CONTMDC_MASK (0x1 << I40E_GLGEN_MDIO_CTRL_CONTMDC_SHIFT)
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT 18
+#define I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_MASK (0x3FFF << I40E_GLGEN_MDIO_CTRL_LEGACY_RSVD1_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL(_i) (0x000881C0 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_MDIO_I2C_SEL_MAX_INDEX 3
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_SHIFT 0
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_MASK (0x1 << I40E_GLGEN_MDIO_I2C_SEL_MDIO_I2C_SEL_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT 1
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_MASK (0xF << I40E_GLGEN_MDIO_I2C_SEL_PHY_PORT_NUM_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_SHIFT 5
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_MASK (0x1F << I40E_GLGEN_MDIO_I2C_SEL_PHY0_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_SHIFT 10
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_MASK (0x1F << I40E_GLGEN_MDIO_I2C_SEL_PHY1_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_SHIFT 15
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_MASK (0x1F << I40E_GLGEN_MDIO_I2C_SEL_PHY2_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_SHIFT 20
+#define I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_MASK (0x1F << I40E_GLGEN_MDIO_I2C_SEL_PHY3_ADDRESS_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_SHIFT 25
+#define I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_MASK (0xF << I40E_GLGEN_MDIO_I2C_SEL_MDIO_IF_MODE_SHIFT)
+#define I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_SHIFT 31
+#define I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_MASK (0x1 << I40E_GLGEN_MDIO_I2C_SEL_EN_FAST_MODE_SHIFT)
+#define I40E_GLGEN_MSCA(_i) (0x0008818C + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_MSCA_MAX_INDEX 3
+#define I40E_GLGEN_MSCA_MDIADD_SHIFT 0
+#define I40E_GLGEN_MSCA_MDIADD_MASK (0xFFFF << I40E_GLGEN_MSCA_MDIADD_SHIFT)
+#define I40E_GLGEN_MSCA_DEVADD_SHIFT 16
+#define I40E_GLGEN_MSCA_DEVADD_MASK (0x1F << I40E_GLGEN_MSCA_DEVADD_SHIFT)
+#define I40E_GLGEN_MSCA_PHYADD_SHIFT 21
+#define I40E_GLGEN_MSCA_PHYADD_MASK (0x1F << I40E_GLGEN_MSCA_PHYADD_SHIFT)
+#define I40E_GLGEN_MSCA_OPCODE_SHIFT 26
+#define I40E_GLGEN_MSCA_OPCODE_MASK (0x3 << I40E_GLGEN_MSCA_OPCODE_SHIFT)
+#define I40E_GLGEN_MSCA_STCODE_SHIFT 28
+#define I40E_GLGEN_MSCA_STCODE_MASK (0x3 << I40E_GLGEN_MSCA_STCODE_SHIFT)
+#define I40E_GLGEN_MSCA_MDICMD_SHIFT 30
+#define I40E_GLGEN_MSCA_MDICMD_MASK (0x1 << I40E_GLGEN_MSCA_MDICMD_SHIFT)
+#define I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT 31
+#define I40E_GLGEN_MSCA_MDIINPROGEN_MASK (0x1 << I40E_GLGEN_MSCA_MDIINPROGEN_SHIFT)
+#define I40E_GLGEN_MSRWD(_i) (0x0008819C + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_MSRWD_MAX_INDEX 3
+#define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
+#define I40E_GLGEN_MSRWD_MDIWRDATA_MASK (0xFFFF << I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT)
+#define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16
+#define I40E_GLGEN_MSRWD_MDIRDDATA_MASK (0xFFFF << I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK (0x1F << I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK (0xFF << I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT)
+#define I40E_GLGEN_PE_ENA 0x000B81A0
+#define I40E_GLGEN_PE_ENA_PE_ENA_SHIFT 0
+#define I40E_GLGEN_PE_ENA_PE_ENA_MASK (0x1 << I40E_GLGEN_PE_ENA_PE_ENA_SHIFT)
+#define I40E_GLGEN_PE_ENA_PE_CLK_SRC_SEL_SHIFT 1
+#define I40E_GLGEN_PE_ENA_PE_CLK_SRC_SEL_MASK (0x3 << I40E_GLGEN_PE_ENA_PE_CLK_SRC_SEL_SHIFT)
+#define I40E_GLGEN_RSTAT 0x000B8188
+#define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0
+#define I40E_GLGEN_RSTAT_DEVSTATE_MASK (0x3 << I40E_GLGEN_RSTAT_DEVSTATE_SHIFT)
+#define I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT 2
+#define I40E_GLGEN_RSTAT_RESET_TYPE_MASK (0x3 << I40E_GLGEN_RSTAT_RESET_TYPE_SHIFT)
+#define I40E_GLGEN_RSTAT_CORERCNT_SHIFT 4
+#define I40E_GLGEN_RSTAT_CORERCNT_MASK (0x3 << I40E_GLGEN_RSTAT_CORERCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_GLOBRCNT_SHIFT 6
+#define I40E_GLGEN_RSTAT_GLOBRCNT_MASK (0x3 << I40E_GLGEN_RSTAT_GLOBRCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_EMPRCNT_SHIFT 8
+#define I40E_GLGEN_RSTAT_EMPRCNT_MASK (0x3 << I40E_GLGEN_RSTAT_EMPRCNT_SHIFT)
+#define I40E_GLGEN_RSTAT_TIME_TO_RST_SHIFT 10
+#define I40E_GLGEN_RSTAT_TIME_TO_RST_MASK (0x3F << I40E_GLGEN_RSTAT_TIME_TO_RST_SHIFT)
+#define I40E_GLGEN_RSTCTL 0x000B8180
+#define I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT 0
+#define I40E_GLGEN_RSTCTL_GRSTDEL_MASK (0x3F << I40E_GLGEN_RSTCTL_GRSTDEL_SHIFT)
+#define I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT 8
+#define I40E_GLGEN_RSTCTL_ECC_RST_ENA_MASK (0x1 << I40E_GLGEN_RSTCTL_ECC_RST_ENA_SHIFT)
+#define I40E_GLGEN_RSTENA_EMP 0x000B818C
+#define I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_SHIFT 0
+#define I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_MASK (0x1 << I40E_GLGEN_RSTENA_EMP_EMP_RST_ENA_SHIFT)
+#define I40E_GLGEN_RTRIG 0x000B8190
+#define I40E_GLGEN_RTRIG_CORER_SHIFT 0
+#define I40E_GLGEN_RTRIG_CORER_MASK (0x1 << I40E_GLGEN_RTRIG_CORER_SHIFT)
+#define I40E_GLGEN_RTRIG_GLOBR_SHIFT 1
+#define I40E_GLGEN_RTRIG_GLOBR_MASK (0x1 << I40E_GLGEN_RTRIG_GLOBR_SHIFT)
+#define I40E_GLGEN_RTRIG_EMPFWR_SHIFT 2
+#define I40E_GLGEN_RTRIG_EMPFWR_MASK (0x1 << I40E_GLGEN_RTRIG_EMPFWR_SHIFT)
+#define I40E_GLGEN_STAT 0x000B612C
+#define I40E_GLGEN_STAT_HWRSVD0_SHIFT 0
+#define I40E_GLGEN_STAT_HWRSVD0_MASK (0x3 << I40E_GLGEN_STAT_HWRSVD0_SHIFT)
+#define I40E_GLGEN_STAT_DCBEN_SHIFT 2
+#define I40E_GLGEN_STAT_DCBEN_MASK (0x1 << I40E_GLGEN_STAT_DCBEN_SHIFT)
+#define I40E_GLGEN_STAT_VTEN_SHIFT 3
+#define I40E_GLGEN_STAT_VTEN_MASK (0x1 << I40E_GLGEN_STAT_VTEN_SHIFT)
+#define I40E_GLGEN_STAT_FCOEN_SHIFT 4
+#define I40E_GLGEN_STAT_FCOEN_MASK (0x1 << I40E_GLGEN_STAT_FCOEN_SHIFT)
+#define I40E_GLGEN_STAT_EVBEN_SHIFT 5
+#define I40E_GLGEN_STAT_EVBEN_MASK (0x1 << I40E_GLGEN_STAT_EVBEN_SHIFT)
+#define I40E_GLGEN_STAT_HWRSVD1_SHIFT 6
+#define I40E_GLGEN_STAT_HWRSVD1_MASK (0x3 << I40E_GLGEN_STAT_HWRSVD1_SHIFT)
+#define I40E_GLGEN_VFLRSTAT(_i) (0x00092600 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLGEN_VFLRSTAT_MAX_INDEX 3
+#define I40E_GLGEN_VFLRSTAT_VFLRE_SHIFT 0
+#define I40E_GLGEN_VFLRSTAT_VFLRE_MASK (0xFFFFFFFF << I40E_GLGEN_VFLRSTAT_VFLRE_SHIFT)
+#define I40E_GLVFGEN_TIMER 0x000881BC
+#define I40E_GLVFGEN_TIMER_GTIME_SHIFT 0
+#define I40E_GLVFGEN_TIMER_GTIME_MASK (0xFFFFFFFF << I40E_GLVFGEN_TIMER_GTIME_SHIFT)
+#define I40E_PFGEN_CTRL 0x00092400
+#define I40E_PFGEN_CTRL_PFSWR_SHIFT 0
+#define I40E_PFGEN_CTRL_PFSWR_MASK (0x1 << I40E_PFGEN_CTRL_PFSWR_SHIFT)
+#define I40E_PFGEN_DRUN 0x00092500
+#define I40E_PFGEN_DRUN_DRVUNLD_SHIFT 0
+#define I40E_PFGEN_DRUN_DRVUNLD_MASK (0x1 << I40E_PFGEN_DRUN_DRVUNLD_SHIFT)
+#define I40E_PFGEN_PORTNUM 0x001C0480
+#define I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT 0
+#define I40E_PFGEN_PORTNUM_PORT_NUM_MASK (0x3 << I40E_PFGEN_PORTNUM_PORT_NUM_SHIFT)
+#define I40E_PFGEN_STATE 0x00088000
+#define I40E_PFGEN_STATE_PFPEEN_SHIFT 0
+#define I40E_PFGEN_STATE_PFPEEN_MASK (0x1 << I40E_PFGEN_STATE_PFPEEN_SHIFT)
+#define I40E_PFGEN_STATE_PFFCEN_SHIFT 1
+#define I40E_PFGEN_STATE_PFFCEN_MASK (0x1 << I40E_PFGEN_STATE_PFFCEN_SHIFT)
+#define I40E_PFGEN_STATE_PFLINKEN_SHIFT 2
+#define I40E_PFGEN_STATE_PFLINKEN_MASK (0x1 << I40E_PFGEN_STATE_PFLINKEN_SHIFT)
+#define I40E_PFGEN_STATE_PFSCEN_SHIFT 3
+#define I40E_PFGEN_STATE_PFSCEN_MASK (0x1 << I40E_PFGEN_STATE_PFSCEN_SHIFT)
+#define I40E_PRTGEN_CNF 0x000B8120
+#define I40E_PRTGEN_CNF_PORT_DIS_SHIFT 0
+#define I40E_PRTGEN_CNF_PORT_DIS_MASK (0x1 << I40E_PRTGEN_CNF_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF_ALLOW_PORT_DIS_SHIFT 1
+#define I40E_PRTGEN_CNF_ALLOW_PORT_DIS_MASK (0x1 << I40E_PRTGEN_CNF_ALLOW_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF_EMP_PORT_DIS_SHIFT 2
+#define I40E_PRTGEN_CNF_EMP_PORT_DIS_MASK (0x1 << I40E_PRTGEN_CNF_EMP_PORT_DIS_SHIFT)
+#define I40E_PRTGEN_CNF2 0x000B8160
+#define I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_SHIFT 0
+#define I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_MASK (0x1 << I40E_PRTGEN_CNF2_ACTIVATE_PORT_LINK_SHIFT)
+#define I40E_PRTGEN_STATUS 0x000B8100
+#define I40E_PRTGEN_STATUS_PORT_VALID_SHIFT 0
+#define I40E_PRTGEN_STATUS_PORT_VALID_MASK (0x1 << I40E_PRTGEN_STATUS_PORT_VALID_SHIFT)
+#define I40E_PRTGEN_STATUS_PORT_ACTIVE_SHIFT 1
+#define I40E_PRTGEN_STATUS_PORT_ACTIVE_MASK (0x1 << I40E_PRTGEN_STATUS_PORT_ACTIVE_SHIFT)
+#define I40E_VFGEN_RSTAT1(_VF) (0x00074400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFGEN_RSTAT1_MAX_INDEX 127
+#define I40E_VFGEN_RSTAT1_VFR_STATE_SHIFT 0
+#define I40E_VFGEN_RSTAT1_VFR_STATE_MASK (0x3 << I40E_VFGEN_RSTAT1_VFR_STATE_SHIFT)
+#define I40E_VPGEN_VFRSTAT(_VF) (0x00091C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPGEN_VFRSTAT_MAX_INDEX 127
+#define I40E_VPGEN_VFRSTAT_VFRD_SHIFT 0
+#define I40E_VPGEN_VFRSTAT_VFRD_MASK (0x1 << I40E_VPGEN_VFRSTAT_VFRD_SHIFT)
+#define I40E_VPGEN_VFRTRIG(_VF) (0x00091800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPGEN_VFRTRIG_MAX_INDEX 127
+#define I40E_VPGEN_VFRTRIG_VFSWR_SHIFT 0
+#define I40E_VPGEN_VFRTRIG_VFSWR_MASK (0x1 << I40E_VPGEN_VFRTRIG_VFSWR_SHIFT)
+#define I40E_VSIGEN_RSTAT(_VSI) (0x00090800 + ((_VSI) * 4)) /* _i=0...383 */
+#define I40E_VSIGEN_RSTAT_MAX_INDEX 383
+#define I40E_VSIGEN_RSTAT_VMRD_SHIFT 0
+#define I40E_VSIGEN_RSTAT_VMRD_MASK (0x1 << I40E_VSIGEN_RSTAT_VMRD_SHIFT)
+#define I40E_VSIGEN_RTRIG(_VSI) (0x00090000 + ((_VSI) * 4)) /* _i=0...383 */
+#define I40E_VSIGEN_RTRIG_MAX_INDEX 383
+#define I40E_VSIGEN_RTRIG_VMSWR_SHIFT 0
+#define I40E_VSIGEN_RTRIG_VMSWR_MASK (0x1 << I40E_VSIGEN_RTRIG_VMSWR_SHIFT)
+#define I40E_GLHMC_APBVTINUSEBASE(_i) (0x000C4a00 + ((_i) * 4))
+#define I40E_GLHMC_APBVTINUSEBASE_MAX_INDEX 15
+#define I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT 0
+#define I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_MASK (0xFFFFFF << I40E_GLHMC_APBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT)
+#define I40E_GLHMC_CEQPART(_i) (0x001312C0 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_CEQPART_MAX_INDEX 15
+#define I40E_GLHMC_CEQPART_PMCEQBASE_SHIFT 0
+#define I40E_GLHMC_CEQPART_PMCEQBASE_MASK (0xFF << I40E_GLHMC_CEQPART_PMCEQBASE_SHIFT)
+#define I40E_GLHMC_CEQPART_PMCEQSIZE_SHIFT 16
+#define I40E_GLHMC_CEQPART_PMCEQSIZE_MASK (0x1FF << I40E_GLHMC_CEQPART_PMCEQSIZE_SHIFT)
+#define I40E_GLHMC_DBCQPART(_i) (0x00131240 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_DBCQPART_MAX_INDEX 15
+#define I40E_GLHMC_DBCQPART_PMDBCQBASE_SHIFT 0
+#define I40E_GLHMC_DBCQPART_PMDBCQBASE_MASK (0x3FFF << I40E_GLHMC_DBCQPART_PMDBCQBASE_SHIFT)
+#define I40E_GLHMC_DBCQPART_PMDBCQSIZE_SHIFT 16
+#define I40E_GLHMC_DBCQPART_PMDBCQSIZE_MASK (0x7FFF << I40E_GLHMC_DBCQPART_PMDBCQSIZE_SHIFT)
+#define I40E_GLHMC_DBQPPART(_i) (0x00138D80 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_DBQPPART_MAX_INDEX 15
+#define I40E_GLHMC_DBQPPART_PMDBQPBASE_SHIFT 0
+#define I40E_GLHMC_DBQPPART_PMDBQPBASE_MASK (0x3FFF << I40E_GLHMC_DBQPPART_PMDBQPBASE_SHIFT)
+#define I40E_GLHMC_DBQPPART_PMDBQPSIZE_SHIFT 16
+#define I40E_GLHMC_DBQPPART_PMDBQPSIZE_MASK (0x7FFF << I40E_GLHMC_DBQPPART_PMDBQPSIZE_SHIFT)
+#define I40E_GLHMC_FCOEDDPBASE(_i) (0x000C6600 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FCOEDDPBASE_MAX_INDEX 15
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT 0
+#define I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_MASK (0xFFFFFF << I40E_GLHMC_FCOEDDPBASE_FPMFCOEDDPBASE_SHIFT)
+#define I40E_GLHMC_FCOEDDPCNT(_i) (0x000C6700 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FCOEDDPCNT_MAX_INDEX 15
+#define I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_SHIFT 0
+#define I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_MASK (0xFFFFF << I40E_GLHMC_FCOEDDPCNT_FPMFCOEDDPCNT_SHIFT)
+#define I40E_GLHMC_FCOEDDPOBJSZ 0x000C2010
+#define I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_SHIFT 0
+#define I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_MASK (0xF << I40E_GLHMC_FCOEDDPOBJSZ_PMFCOEDDPOBJSZ_SHIFT)
+#define I40E_GLHMC_FCOEFBASE(_i) (0x000C6800 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FCOEFBASE_MAX_INDEX 15
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT 0
+#define I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_MASK (0xFFFFFF << I40E_GLHMC_FCOEFBASE_FPMFCOEFBASE_SHIFT)
+#define I40E_GLHMC_FCOEFCNT(_i) (0x000C6900 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FCOEFCNT_MAX_INDEX 15
+#define I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_SHIFT 0
+#define I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_MASK (0x7FFFFF << I40E_GLHMC_FCOEFCNT_FPMFCOEFCNT_SHIFT)
+#define I40E_GLHMC_FCOEFMAX 0x000C20D0
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT 0
+#define I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_MASK (0xFFFF << I40E_GLHMC_FCOEFMAX_PMFCOEFMAX_SHIFT)
+#define I40E_GLHMC_FCOEFOBJSZ 0x000C2018
+#define I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_SHIFT 0
+#define I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_MASK (0xF << I40E_GLHMC_FCOEFOBJSZ_PMFCOEFOBJSZ_SHIFT)
+#define I40E_GLHMC_FCOEMAX 0x000C2014
+#define I40E_GLHMC_FCOEMAX_PMFCOEMAX_SHIFT 0
+#define I40E_GLHMC_FCOEMAX_PMFCOEMAX_MASK (0x1FFF << I40E_GLHMC_FCOEMAX_PMFCOEMAX_SHIFT)
+#define I40E_GLHMC_FSIAVBASE(_i) (0x000C5600 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FSIAVBASE_MAX_INDEX 15
+#define I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_SHIFT 0
+#define I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_MASK (0xFFFFFF << I40E_GLHMC_FSIAVBASE_FPMFSIAVBASE_SHIFT)
+#define I40E_GLHMC_FSIAVCNT(_i) (0x000C5700 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FSIAVCNT_MAX_INDEX 15
+#define I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_SHIFT 0
+#define I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_MASK (0x1FFFFFFF << I40E_GLHMC_FSIAVCNT_FPMFSIAVCNT_SHIFT)
+#define I40E_GLHMC_FSIAVCNT_RSVD_SHIFT 29
+#define I40E_GLHMC_FSIAVCNT_RSVD_MASK (0x7 << I40E_GLHMC_FSIAVCNT_RSVD_SHIFT)
+#define I40E_GLHMC_FSIAVMAX 0x000C2068
+#define I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_SHIFT 0
+#define I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_MASK (0x1FFFF << I40E_GLHMC_FSIAVMAX_PMFSIAVMAX_SHIFT)
+#define I40E_GLHMC_FSIAVOBJSZ 0x000C2064
+#define I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_SHIFT 0
+#define I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_MASK (0xF << I40E_GLHMC_FSIAVOBJSZ_PMFSIAVOBJSZ_SHIFT)
+#define I40E_GLHMC_FSIMCBASE(_i) (0x000C6000 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FSIMCBASE_MAX_INDEX 15
+#define I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_SHIFT 0
+#define I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_MASK (0xFFFFFF << I40E_GLHMC_FSIMCBASE_FPMFSIMCBASE_SHIFT)
+#define I40E_GLHMC_FSIMCCNT(_i) (0x000C6100 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_FSIMCCNT_MAX_INDEX 15
+#define I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_SHIFT 0
+#define I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_MASK (0x1FFFFFFF << I40E_GLHMC_FSIMCCNT_FPMFSIMCSZ_SHIFT)
+#define I40E_GLHMC_FSIMCMAX 0x000C2060
+#define I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_SHIFT 0
+#define I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_MASK (0x3FFF << I40E_GLHMC_FSIMCMAX_PMFSIMCMAX_SHIFT)
+#define I40E_GLHMC_FSIMCOBJSZ 0x000C205c
+#define I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_SHIFT 0
+#define I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_MASK (0xF << I40E_GLHMC_FSIMCOBJSZ_PMFSIMCOBJSZ_SHIFT)
+#define I40E_GLHMC_LANQMAX 0x000C2008
+#define I40E_GLHMC_LANQMAX_PMLANQMAX_SHIFT 0
+#define I40E_GLHMC_LANQMAX_PMLANQMAX_MASK (0x7FF << I40E_GLHMC_LANQMAX_PMLANQMAX_SHIFT)
+#define I40E_GLHMC_LANRXBASE(_i) (0x000C6400 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_LANRXBASE_MAX_INDEX 15
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT 0
+#define I40E_GLHMC_LANRXBASE_FPMLANRXBASE_MASK (0xFFFFFF << I40E_GLHMC_LANRXBASE_FPMLANRXBASE_SHIFT)
+#define I40E_GLHMC_LANRXCNT(_i) (0x000C6500 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_LANRXCNT_MAX_INDEX 15
+#define I40E_GLHMC_LANRXCNT_FPMLANRXCNT_SHIFT 0
+#define I40E_GLHMC_LANRXCNT_FPMLANRXCNT_MASK (0x7FF << I40E_GLHMC_LANRXCNT_FPMLANRXCNT_SHIFT)
+#define I40E_GLHMC_LANRXOBJSZ 0x000C200c
+#define I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_SHIFT 0
+#define I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_MASK (0xF << I40E_GLHMC_LANRXOBJSZ_PMLANRXOBJSZ_SHIFT)
+#define I40E_GLHMC_LANTXBASE(_i) (0x000C6200 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_LANTXBASE_MAX_INDEX 15
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT 0
+#define I40E_GLHMC_LANTXBASE_FPMLANTXBASE_MASK (0xFFFFFF << I40E_GLHMC_LANTXBASE_FPMLANTXBASE_SHIFT)
+#define I40E_GLHMC_LANTXBASE_RSVD_SHIFT 24
+#define I40E_GLHMC_LANTXBASE_RSVD_MASK (0xFF << I40E_GLHMC_LANTXBASE_RSVD_SHIFT)
+#define I40E_GLHMC_LANTXCNT(_i) (0x000C6300 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_LANTXCNT_MAX_INDEX 15
+#define I40E_GLHMC_LANTXCNT_FPMLANTXCNT_SHIFT 0
+#define I40E_GLHMC_LANTXCNT_FPMLANTXCNT_MASK (0x7FF << I40E_GLHMC_LANTXCNT_FPMLANTXCNT_SHIFT)
+#define I40E_GLHMC_LANTXOBJSZ 0x000C2004
+#define I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_SHIFT 0
+#define I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_MASK (0xF << I40E_GLHMC_LANTXOBJSZ_PMLANTXOBJSZ_SHIFT)
+#define I40E_GLHMC_PEARPBASE(_i) (0x000C4800 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEARPBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEARPBASE_FPMPEARPBASE_SHIFT 0
+#define I40E_GLHMC_PEARPBASE_FPMPEARPBASE_MASK (0xFFFFFF << I40E_GLHMC_PEARPBASE_FPMPEARPBASE_SHIFT)
+#define I40E_GLHMC_PEARPCNT(_i) (0x000C4900 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEARPCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEARPCNT_FPMPEARPCNT_SHIFT 0
+#define I40E_GLHMC_PEARPCNT_FPMPEARPCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEARPCNT_FPMPEARPCNT_SHIFT)
+#define I40E_GLHMC_PEARPMAX 0x000C2038
+#define I40E_GLHMC_PEARPMAX_PMPEARPMAX_SHIFT 0
+#define I40E_GLHMC_PEARPMAX_PMPEARPMAX_MASK (0x1FFFF << I40E_GLHMC_PEARPMAX_PMPEARPMAX_SHIFT)
+#define I40E_GLHMC_PEARPOBJSZ 0x000C2034
+#define I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_MASK (0x7 << I40E_GLHMC_PEARPOBJSZ_PMPEARPOBJSZ_SHIFT)
+#define I40E_GLHMC_PECQBASE(_i) (0x000C4200 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PECQBASE_MAX_INDEX 15
+#define I40E_GLHMC_PECQBASE_FPMPECQBASE_SHIFT 0
+#define I40E_GLHMC_PECQBASE_FPMPECQBASE_MASK (0xFFFFFF << I40E_GLHMC_PECQBASE_FPMPECQBASE_SHIFT)
+#define I40E_GLHMC_PECQCNT(_i) (0x000C4300 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PECQCNT_MAX_INDEX 15
+#define I40E_GLHMC_PECQCNT_FPMPECQCNT_SHIFT 0
+#define I40E_GLHMC_PECQCNT_FPMPECQCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PECQCNT_FPMPECQCNT_SHIFT)
+#define I40E_GLHMC_PECQOBJSZ 0x000C2020
+#define I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_SHIFT 0
+#define I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_MASK (0xF << I40E_GLHMC_PECQOBJSZ_PMPECQOBJSZ_SHIFT)
+#define I40E_GLHMC_PEHTCNT(_i) (0x000C4700 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEHTCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEHTCNT_FPMPEHTCNT_SHIFT 0
+#define I40E_GLHMC_PEHTCNT_FPMPEHTCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEHTCNT_FPMPEHTCNT_SHIFT)
+#define I40E_GLHMC_PEHTEBASE(_i) (0x000C4600 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEHTEBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_SHIFT 0
+#define I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_MASK (0xFFFFFF << I40E_GLHMC_PEHTEBASE_FPMPEHTEBASE_SHIFT)
+#define I40E_GLHMC_PEHTEOBJSZ 0x000C202c
+#define I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_MASK (0xF << I40E_GLHMC_PEHTEOBJSZ_PMPEHTEOBJSZ_SHIFT)
+#define I40E_GLHMC_PEHTMAX 0x000C2030
+#define I40E_GLHMC_PEHTMAX_PMPEHTMAX_SHIFT 0
+#define I40E_GLHMC_PEHTMAX_PMPEHTMAX_MASK (0x1FFFFF << I40E_GLHMC_PEHTMAX_PMPEHTMAX_SHIFT)
+#define I40E_GLHMC_PEMRBASE(_i) (0x000C4c00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEMRBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEMRBASE_FPMPEMRBASE_SHIFT 0
+#define I40E_GLHMC_PEMRBASE_FPMPEMRBASE_MASK (0xFFFFFF << I40E_GLHMC_PEMRBASE_FPMPEMRBASE_SHIFT)
+#define I40E_GLHMC_PEMRCNT(_i) (0x000C4d00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEMRCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEMRCNT_FPMPEMRSZ_SHIFT 0
+#define I40E_GLHMC_PEMRCNT_FPMPEMRSZ_MASK (0x1FFFFFFF << I40E_GLHMC_PEMRCNT_FPMPEMRSZ_SHIFT)
+#define I40E_GLHMC_PEMRMAX 0x000C2040
+#define I40E_GLHMC_PEMRMAX_PMPEMRMAX_SHIFT 0
+#define I40E_GLHMC_PEMRMAX_PMPEMRMAX_MASK (0x7FFFFF << I40E_GLHMC_PEMRMAX_PMPEMRMAX_SHIFT)
+#define I40E_GLHMC_PEMROBJSZ 0x000C203c
+#define I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_SHIFT 0
+#define I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_MASK (0xF << I40E_GLHMC_PEMROBJSZ_PMPEMROBJSZ_SHIFT)
+#define I40E_GLHMC_PEPBLBASE(_i) (0x000C5800 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEPBLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_SHIFT 0
+#define I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_MASK (0xFFFFFF << I40E_GLHMC_PEPBLBASE_FPMPEPBLBASE_SHIFT)
+#define I40E_GLHMC_PEPBLCNT(_i) (0x000C5900 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEPBLCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_SHIFT 0
+#define I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEPBLCNT_FPMPEPBLCNT_SHIFT)
+#define I40E_GLHMC_PEPBLMAX 0x000C206c
+#define I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_SHIFT 0
+#define I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_MASK (0x1FFFFFFF << I40E_GLHMC_PEPBLMAX_PMPEPBLMAX_SHIFT)
+#define I40E_GLHMC_PEQ1BASE(_i) (0x000C5200 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQ1BASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_SHIFT 0
+#define I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_MASK (0xFFFFFF << I40E_GLHMC_PEQ1BASE_FPMPEQ1BASE_SHIFT)
+#define I40E_GLHMC_PEQ1CNT(_i) (0x000C5300 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQ1CNT_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_SHIFT 0
+#define I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEQ1CNT_FPMPEQ1CNT_SHIFT)
+#define I40E_GLHMC_PEQ1FLBASE(_i) (0x000C5400 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQ1FLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_SHIFT 0
+#define I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_MASK (0xFFFFFF << I40E_GLHMC_PEQ1FLBASE_FPMPEQ1FLBASE_SHIFT)
+#define I40E_GLHMC_PEQ1FLCNT(_i) (0x000C5500 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQ1FLCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEQ1FLCNT_FPMPEQ1FLCNT_SHIFT 0
+#define I40E_GLHMC_PEQ1FLCNT_FPMPEQ1FLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEQ1FLCNT_FPMPEQ1FLCNT_SHIFT)
+#define I40E_GLHMC_PEQ1FLMAX 0x000C2058
+#define I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_SHIFT 0
+#define I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_MASK (0x3FFFFF << I40E_GLHMC_PEQ1FLMAX_PMPEQ1FLMAX_SHIFT)
+#define I40E_GLHMC_PEQ1MAX 0x000C2054
+#define I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_SHIFT 0
+#define I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_MASK (0x3FFFFFF << I40E_GLHMC_PEQ1MAX_PMPEQ1MAX_SHIFT)
+#define I40E_GLHMC_PEQ1OBJSZ 0x000C2050
+#define I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_SHIFT 0
+#define I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_MASK (0xF << I40E_GLHMC_PEQ1OBJSZ_PMPEQ1OBJSZ_SHIFT)
+#define I40E_GLHMC_PEQPBASE(_i) (0x000C4000 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQPBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEQPBASE_FPMPEQPBASE_SHIFT 0
+#define I40E_GLHMC_PEQPBASE_FPMPEQPBASE_MASK (0xFFFFFF << I40E_GLHMC_PEQPBASE_FPMPEQPBASE_SHIFT)
+#define I40E_GLHMC_PEQPCNT(_i) (0x000C4100 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEQPCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEQPCNT_FPMPEQPCNT_SHIFT 0
+#define I40E_GLHMC_PEQPCNT_FPMPEQPCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEQPCNT_FPMPEQPCNT_SHIFT)
+#define I40E_GLHMC_PEQPOBJSZ 0x000C201c
+#define I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_MASK (0xF << I40E_GLHMC_PEQPOBJSZ_PMPEQPOBJSZ_SHIFT)
+#define I40E_GLHMC_PESRQBASE(_i) (0x000C4400 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PESRQBASE_MAX_INDEX 15
+#define I40E_GLHMC_PESRQBASE_FPMPESRQBASE_SHIFT 0
+#define I40E_GLHMC_PESRQBASE_FPMPESRQBASE_MASK (0xFFFFFF << I40E_GLHMC_PESRQBASE_FPMPESRQBASE_SHIFT)
+#define I40E_GLHMC_PESRQCNT(_i) (0x000C4500 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PESRQCNT_MAX_INDEX 15
+#define I40E_GLHMC_PESRQCNT_FPMPESRQCNT_SHIFT 0
+#define I40E_GLHMC_PESRQCNT_FPMPESRQCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PESRQCNT_FPMPESRQCNT_SHIFT)
+#define I40E_GLHMC_PESRQMAX 0x000C2028
+#define I40E_GLHMC_PESRQMAX_PMPESRQMAX_SHIFT 0
+#define I40E_GLHMC_PESRQMAX_PMPESRQMAX_MASK (0xFFFF << I40E_GLHMC_PESRQMAX_PMPESRQMAX_SHIFT)
+#define I40E_GLHMC_PESRQOBJSZ 0x000C2024
+#define I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_SHIFT 0
+#define I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_MASK (0xF << I40E_GLHMC_PESRQOBJSZ_PMPESRQOBJSZ_SHIFT)
+#define I40E_GLHMC_PESRQOBJSZ_RSVD_SHIFT 4
+#define I40E_GLHMC_PESRQOBJSZ_RSVD_MASK (0xFFFFFFF << I40E_GLHMC_PESRQOBJSZ_RSVD_SHIFT)
+#define I40E_GLHMC_PETIMERBASE(_i) (0x000C5A00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PETIMERBASE_MAX_INDEX 15
+#define I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_SHIFT 0
+#define I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_MASK (0xFFFFFF << I40E_GLHMC_PETIMERBASE_FPMPETIMERBASE_SHIFT)
+#define I40E_GLHMC_PETIMERCNT(_i) (0x000C5B00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PETIMERCNT_MAX_INDEX 15
+#define I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_SHIFT 0
+#define I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PETIMERCNT_FPMPETIMERCNT_SHIFT)
+#define I40E_GLHMC_PETIMERMAX 0x000C2084
+#define I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_SHIFT 0
+#define I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_MASK (0x1FFFFFFF << I40E_GLHMC_PETIMERMAX_PMPETIMERMAX_SHIFT)
+#define I40E_GLHMC_PETIMEROBJSZ 0x000C2080
+#define I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_SHIFT 0
+#define I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_MASK (0xF << I40E_GLHMC_PETIMEROBJSZ_PMPETIMEROBJSZ_SHIFT)
+#define I40E_GLHMC_PEXFBASE(_i) (0x000C4e00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEXFBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEXFBASE_FPMPEXFBASE_SHIFT 0
+#define I40E_GLHMC_PEXFBASE_FPMPEXFBASE_MASK (0xFFFFFF << I40E_GLHMC_PEXFBASE_FPMPEXFBASE_SHIFT)
+#define I40E_GLHMC_PEXFCNT(_i) (0x000C4f00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEXFCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEXFCNT_FPMPEXFCNT_SHIFT 0
+#define I40E_GLHMC_PEXFCNT_FPMPEXFCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEXFCNT_FPMPEXFCNT_SHIFT)
+#define I40E_GLHMC_PEXFFLBASE(_i) (0x000C5000 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEXFFLBASE_MAX_INDEX 15
+#define I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_SHIFT 0
+#define I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_MASK (0xFFFFFF << I40E_GLHMC_PEXFFLBASE_FPMPEXFFLBASE_SHIFT)
+#define I40E_GLHMC_PEXFFLCNT(_i) (0x000C5100 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PEXFFLCNT_MAX_INDEX 15
+#define I40E_GLHMC_PEXFFLCNT_FPMPEXFFLCNT_SHIFT 0
+#define I40E_GLHMC_PEXFFLCNT_FPMPEXFFLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_PEXFFLCNT_FPMPEXFFLCNT_SHIFT)
+#define I40E_GLHMC_PEXFFLMAX 0x000C204c
+#define I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_SHIFT 0
+#define I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_MASK (0x3FFFFF << I40E_GLHMC_PEXFFLMAX_PMPEXFFLMAX_SHIFT)
+#define I40E_GLHMC_PEXFMAX 0x000C2048
+#define I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT 0
+#define I40E_GLHMC_PEXFMAX_PMPEXFMAX_MASK (0x3FFFFFF << I40E_GLHMC_PEXFMAX_PMPEXFMAX_SHIFT)
+#define I40E_GLHMC_PEXFOBJSZ 0x000C2044
+#define I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_SHIFT 0
+#define I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_MASK (0xF << I40E_GLHMC_PEXFOBJSZ_PMPEXFOBJSZ_SHIFT)
+#define I40E_GLHMC_PEXFOBJSZ_RSVD_SHIFT 4
+#define I40E_GLHMC_PEXFOBJSZ_RSVD_MASK (0xFFFFFFF << I40E_GLHMC_PEXFOBJSZ_RSVD_SHIFT)
+#define I40E_GLHMC_PFASSIGN(_i) (0x000C0c00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_PFASSIGN_MAX_INDEX 15
+#define I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_SHIFT 0
+#define I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_MASK (0xF << I40E_GLHMC_PFASSIGN_PMFCNPFASSIGN_SHIFT)
+#define I40E_GLHMC_SDPART(_i) (0x000C0800 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLHMC_SDPART_MAX_INDEX 15
+#define I40E_GLHMC_SDPART_PMSDBASE_SHIFT 0
+#define I40E_GLHMC_SDPART_PMSDBASE_MASK (0xFFF << I40E_GLHMC_SDPART_PMSDBASE_SHIFT)
+#define I40E_GLHMC_SDPART_PMSDSIZE_SHIFT 16
+#define I40E_GLHMC_SDPART_PMSDSIZE_MASK (0x1FFF << I40E_GLHMC_SDPART_PMSDSIZE_SHIFT)
+#define I40E_GLHMC_VFAPBVTINUSEBASE(_i) (0x000Cca00 + ((_i) * 4))
+#define I40E_GLHMC_VFAPBVTINUSEBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT 0
+#define I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_MASK (0xFFFFFF << I40E_GLHMC_VFAPBVTINUSEBASE_FPMAPBINUSEBASE_SHIFT)
+#define I40E_GLHMC_VFCEQPART(_i) (0x00132240 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFCEQPART_MAX_INDEX 31
+#define I40E_GLHMC_VFCEQPART_PMCEQBASE_SHIFT 0
+#define I40E_GLHMC_VFCEQPART_PMCEQBASE_MASK (0xFF << I40E_GLHMC_VFCEQPART_PMCEQBASE_SHIFT)
+#define I40E_GLHMC_VFCEQPART_PMCEQSIZE_SHIFT 16
+#define I40E_GLHMC_VFCEQPART_PMCEQSIZE_MASK (0x1FF << I40E_GLHMC_VFCEQPART_PMCEQSIZE_SHIFT)
+#define I40E_GLHMC_VFDBCQPART(_i) (0x00132140 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFDBCQPART_MAX_INDEX 31
+#define I40E_GLHMC_VFDBCQPART_PMDBCQBASE_SHIFT 0
+#define I40E_GLHMC_VFDBCQPART_PMDBCQBASE_MASK (0x3FFF << I40E_GLHMC_VFDBCQPART_PMDBCQBASE_SHIFT)
+#define I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_SHIFT 16
+#define I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_MASK (0x7FFF << I40E_GLHMC_VFDBCQPART_PMDBCQSIZE_SHIFT)
+#define I40E_GLHMC_VFDBQPPART(_i) (0x00138E00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFDBQPPART_MAX_INDEX 31
+#define I40E_GLHMC_VFDBQPPART_PMDBQPBASE_SHIFT 0
+#define I40E_GLHMC_VFDBQPPART_PMDBQPBASE_MASK (0x3FFF << I40E_GLHMC_VFDBQPPART_PMDBQPBASE_SHIFT)
+#define I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_SHIFT 16
+#define I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_MASK (0x7FFF << I40E_GLHMC_VFDBQPPART_PMDBQPSIZE_SHIFT)
+#define I40E_GLHMC_VFFSIAVBASE(_i) (0x000Cd600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFFSIAVBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_SHIFT 0
+#define I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_MASK (0xFFFFFF << I40E_GLHMC_VFFSIAVBASE_FPMFSIAVBASE_SHIFT)
+#define I40E_GLHMC_VFFSIAVCNT(_i) (0x000Cd700 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFFSIAVCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_SHIFT 0
+#define I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFFSIAVCNT_FPMFSIAVCNT_SHIFT)
+#define I40E_GLHMC_VFFSIAVCNT_RSVD_SHIFT 29
+#define I40E_GLHMC_VFFSIAVCNT_RSVD_MASK (0x7 << I40E_GLHMC_VFFSIAVCNT_RSVD_SHIFT)
+#define I40E_GLHMC_VFPDINV(_i) (0x000C8300 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPDINV_MAX_INDEX 31
+#define I40E_GLHMC_VFPDINV_PMSDIDX_SHIFT 0
+#define I40E_GLHMC_VFPDINV_PMSDIDX_MASK (0xFFF << I40E_GLHMC_VFPDINV_PMSDIDX_SHIFT)
+#define I40E_GLHMC_VFPDINV_PMPDIDX_SHIFT 16
+#define I40E_GLHMC_VFPDINV_PMPDIDX_MASK (0x1FF << I40E_GLHMC_VFPDINV_PMPDIDX_SHIFT)
+#define I40E_GLHMC_VFPEARPBASE(_i) (0x000Cc800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEARPBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_SHIFT 0
+#define I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEARPBASE_FPMPEARPBASE_SHIFT)
+#define I40E_GLHMC_VFPEARPCNT(_i) (0x000Cc900 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEARPCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_SHIFT 0
+#define I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEARPCNT_FPMPEARPCNT_SHIFT)
+#define I40E_GLHMC_VFPECQBASE(_i) (0x000Cc200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPECQBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPECQBASE_FPMPECQBASE_SHIFT 0
+#define I40E_GLHMC_VFPECQBASE_FPMPECQBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPECQBASE_FPMPECQBASE_SHIFT)
+#define I40E_GLHMC_VFPECQCNT(_i) (0x000Cc300 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPECQCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPECQCNT_FPMPECQCNT_SHIFT 0
+#define I40E_GLHMC_VFPECQCNT_FPMPECQCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPECQCNT_FPMPECQCNT_SHIFT)
+#define I40E_GLHMC_VFPEHTCNT(_i) (0x000Cc700 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEHTCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_SHIFT 0
+#define I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEHTCNT_FPMPEHTCNT_SHIFT)
+#define I40E_GLHMC_VFPEHTEBASE(_i) (0x000Cc600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEHTEBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_SHIFT 0
+#define I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEHTEBASE_FPMPEHTEBASE_SHIFT)
+#define I40E_GLHMC_VFPEMRBASE(_i) (0x000Ccc00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEMRBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_SHIFT 0
+#define I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEMRBASE_FPMPEMRBASE_SHIFT)
+#define I40E_GLHMC_VFPEMRCNT(_i) (0x000Ccd00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEMRCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_SHIFT 0
+#define I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEMRCNT_FPMPEMRSZ_SHIFT)
+#define I40E_GLHMC_VFPEPBLBASE(_i) (0x000Cd800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEPBLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEPBLBASE_FPMPEPBLBASE_SHIFT)
+#define I40E_GLHMC_VFPEPBLCNT(_i) (0x000Cd900 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEPBLCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_SHIFT 0
+#define I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEPBLCNT_FPMPEPBLCNT_SHIFT)
+#define I40E_GLHMC_VFPEQ1BASE(_i) (0x000Cd200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQ1BASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_SHIFT 0
+#define I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEQ1BASE_FPMPEQ1BASE_SHIFT)
+#define I40E_GLHMC_VFPEQ1CNT(_i) (0x000Cd300 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQ1CNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_SHIFT 0
+#define I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEQ1CNT_FPMPEQ1CNT_SHIFT)
+#define I40E_GLHMC_VFPEQ1FLBASE(_i) (0x000Cd400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQ1FLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEQ1FLBASE_FPMPEQ1FLBASE_SHIFT)
+#define I40E_GLHMC_VFPEQ1FLCNT(_i) (0x000Cd500 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQ1FLCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQ1FLCNT_FPMPEQ1FLCNT_SHIFT 0
+#define I40E_GLHMC_VFPEQ1FLCNT_FPMPEQ1FLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEQ1FLCNT_FPMPEQ1FLCNT_SHIFT)
+#define I40E_GLHMC_VFPEQPBASE(_i) (0x000Cc000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQPBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_SHIFT 0
+#define I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEQPBASE_FPMPEQPBASE_SHIFT)
+#define I40E_GLHMC_VFPEQPCNT(_i) (0x000Cc100 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEQPCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_SHIFT 0
+#define I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEQPCNT_FPMPEQPCNT_SHIFT)
+#define I40E_GLHMC_VFPESRQBASE(_i) (0x000Cc400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPESRQBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_SHIFT 0
+#define I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPESRQBASE_FPMPESRQBASE_SHIFT)
+#define I40E_GLHMC_VFPESRQCNT(_i) (0x000Cc500 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPESRQCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_SHIFT 0
+#define I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPESRQCNT_FPMPESRQCNT_SHIFT)
+#define I40E_GLHMC_VFPETIMERBASE(_i) (0x000CDA00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPETIMERBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_SHIFT 0
+#define I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPETIMERBASE_FPMPETIMERBASE_SHIFT)
+#define I40E_GLHMC_VFPETIMERCNT(_i) (0x000CDB00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPETIMERCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_SHIFT 0
+#define I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPETIMERCNT_FPMPETIMERCNT_SHIFT)
+#define I40E_GLHMC_VFPEXFBASE(_i) (0x000Cce00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEXFBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_SHIFT 0
+#define I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEXFBASE_FPMPEXFBASE_SHIFT)
+#define I40E_GLHMC_VFPEXFCNT(_i) (0x000Ccf00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEXFCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_SHIFT 0
+#define I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEXFCNT_FPMPEXFCNT_SHIFT)
+#define I40E_GLHMC_VFPEXFFLBASE(_i) (0x000Cd000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEXFFLBASE_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_SHIFT 0
+#define I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_MASK (0xFFFFFF << I40E_GLHMC_VFPEXFFLBASE_FPMPEXFFLBASE_SHIFT)
+#define I40E_GLHMC_VFPEXFFLCNT(_i) (0x000Cd100 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFPEXFFLCNT_MAX_INDEX 31
+#define I40E_GLHMC_VFPEXFFLCNT_FPMPEXFFLCNT_SHIFT 0
+#define I40E_GLHMC_VFPEXFFLCNT_FPMPEXFFLCNT_MASK (0x1FFFFFFF << I40E_GLHMC_VFPEXFFLCNT_FPMPEXFFLCNT_SHIFT)
+#define I40E_GLHMC_VFSDPART(_i) (0x000C8800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLHMC_VFSDPART_MAX_INDEX 31
+#define I40E_GLHMC_VFSDPART_PMSDBASE_SHIFT 0
+#define I40E_GLHMC_VFSDPART_PMSDBASE_MASK (0xFFF << I40E_GLHMC_VFSDPART_PMSDBASE_SHIFT)
+#define I40E_GLHMC_VFSDPART_PMSDSIZE_SHIFT 16
+#define I40E_GLHMC_VFSDPART_PMSDSIZE_MASK (0x1FFF << I40E_GLHMC_VFSDPART_PMSDSIZE_SHIFT)
+#define I40E_PFHMC_ERRORDATA 0x000C0500
+#define I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_SHIFT 0
+#define I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_MASK (0x3FFFFFFF << I40E_PFHMC_ERRORDATA_HMC_ERROR_DATA_SHIFT)
+#define I40E_PFHMC_ERRORINFO 0x000C0400
+#define I40E_PFHMC_ERRORINFO_PMF_INDEX_SHIFT 0
+#define I40E_PFHMC_ERRORINFO_PMF_INDEX_MASK (0x1F << I40E_PFHMC_ERRORINFO_PMF_INDEX_SHIFT)
+#define I40E_PFHMC_ERRORINFO_PMF_ISVF_SHIFT 7
+#define I40E_PFHMC_ERRORINFO_PMF_ISVF_MASK (0x1 << I40E_PFHMC_ERRORINFO_PMF_ISVF_SHIFT)
+#define I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_SHIFT 8
+#define I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_MASK (0xF << I40E_PFHMC_ERRORINFO_HMC_ERROR_TYPE_SHIFT)
+#define I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_SHIFT 16
+#define I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_MASK (0x1F << I40E_PFHMC_ERRORINFO_HMC_OBJECT_TYPE_SHIFT)
+#define I40E_PFHMC_ERRORINFO_ERROR_DETECTED_SHIFT 31
+#define I40E_PFHMC_ERRORINFO_ERROR_DETECTED_MASK (0x1 << I40E_PFHMC_ERRORINFO_ERROR_DETECTED_SHIFT)
+#define I40E_PFHMC_PDINV 0x000C0300
+#define I40E_PFHMC_PDINV_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_PDINV_PMSDIDX_MASK (0xFFF << I40E_PFHMC_PDINV_PMSDIDX_SHIFT)
+#define I40E_PFHMC_PDINV_PMPDIDX_SHIFT 16
+#define I40E_PFHMC_PDINV_PMPDIDX_MASK (0x1FF << I40E_PFHMC_PDINV_PMPDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD 0x000C0000
+#define I40E_PFHMC_SDCMD_PMSDIDX_SHIFT 0
+#define I40E_PFHMC_SDCMD_PMSDIDX_MASK (0xFFF << I40E_PFHMC_SDCMD_PMSDIDX_SHIFT)
+#define I40E_PFHMC_SDCMD_PMSDWR_SHIFT 31
+#define I40E_PFHMC_SDCMD_PMSDWR_MASK (0x1 << I40E_PFHMC_SDCMD_PMSDWR_SHIFT)
+#define I40E_PFHMC_SDDATAHIGH 0x000C0200
+#define I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_SHIFT 0
+#define I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_MASK (0xFFFFFFFF << I40E_PFHMC_SDDATAHIGH_PMSDDATAHIGH_SHIFT)
+#define I40E_PFHMC_SDDATALOW 0x000C0100
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT 0
+#define I40E_PFHMC_SDDATALOW_PMSDVALID_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDVALID_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT 1
+#define I40E_PFHMC_SDDATALOW_PMSDTYPE_MASK (0x1 << I40E_PFHMC_SDDATALOW_PMSDTYPE_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT 2
+#define I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_MASK (0x3FF << I40E_PFHMC_SDDATALOW_PMSDBPCOUNT_SHIFT)
+#define I40E_PFHMC_SDDATALOW_PMSDDATALOW_SHIFT 12
+#define I40E_PFHMC_SDDATALOW_PMSDDATALOW_MASK (0xFFFFF << I40E_PFHMC_SDDATALOW_PMSDDATALOW_SHIFT)
+#define I40E_GL_UFUSE 0x00094008
+#define I40E_GL_UFUSE_FOUR_PORT_ENABLE_SHIFT 1
+#define I40E_GL_UFUSE_FOUR_PORT_ENABLE_MASK (0x1 << I40E_GL_UFUSE_FOUR_PORT_ENABLE_SHIFT)
+#define I40E_GL_UFUSE_NIC_ID_SHIFT 2
+#define I40E_GL_UFUSE_NIC_ID_MASK (0x1 << I40E_GL_UFUSE_NIC_ID_SHIFT)
+#define I40E_GL_UFUSE_ULT_LOCKOUT_SHIFT 10
+#define I40E_GL_UFUSE_ULT_LOCKOUT_MASK (0x1 << I40E_GL_UFUSE_ULT_LOCKOUT_SHIFT)
+#define I40E_GL_UFUSE_CLS_LOCKOUT_SHIFT 11
+#define I40E_GL_UFUSE_CLS_LOCKOUT_MASK (0x1 << I40E_GL_UFUSE_CLS_LOCKOUT_SHIFT)
+#define I40E_EMPINT_GPIO_ENA 0x00088188
+#define I40E_EMPINT_GPIO_ENA_GPIO0_ENA_SHIFT 0
+#define I40E_EMPINT_GPIO_ENA_GPIO0_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO0_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO1_ENA_SHIFT 1
+#define I40E_EMPINT_GPIO_ENA_GPIO1_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO1_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO2_ENA_SHIFT 2
+#define I40E_EMPINT_GPIO_ENA_GPIO2_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO2_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO3_ENA_SHIFT 3
+#define I40E_EMPINT_GPIO_ENA_GPIO3_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO3_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO4_ENA_SHIFT 4
+#define I40E_EMPINT_GPIO_ENA_GPIO4_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO4_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO5_ENA_SHIFT 5
+#define I40E_EMPINT_GPIO_ENA_GPIO5_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO5_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO6_ENA_SHIFT 6
+#define I40E_EMPINT_GPIO_ENA_GPIO6_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO6_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO7_ENA_SHIFT 7
+#define I40E_EMPINT_GPIO_ENA_GPIO7_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO7_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO8_ENA_SHIFT 8
+#define I40E_EMPINT_GPIO_ENA_GPIO8_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO8_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO9_ENA_SHIFT 9
+#define I40E_EMPINT_GPIO_ENA_GPIO9_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO9_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO10_ENA_SHIFT 10
+#define I40E_EMPINT_GPIO_ENA_GPIO10_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO10_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO11_ENA_SHIFT 11
+#define I40E_EMPINT_GPIO_ENA_GPIO11_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO11_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO12_ENA_SHIFT 12
+#define I40E_EMPINT_GPIO_ENA_GPIO12_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO12_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO13_ENA_SHIFT 13
+#define I40E_EMPINT_GPIO_ENA_GPIO13_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO13_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO14_ENA_SHIFT 14
+#define I40E_EMPINT_GPIO_ENA_GPIO14_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO14_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO15_ENA_SHIFT 15
+#define I40E_EMPINT_GPIO_ENA_GPIO15_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO15_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO16_ENA_SHIFT 16
+#define I40E_EMPINT_GPIO_ENA_GPIO16_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO16_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO17_ENA_SHIFT 17
+#define I40E_EMPINT_GPIO_ENA_GPIO17_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO17_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO18_ENA_SHIFT 18
+#define I40E_EMPINT_GPIO_ENA_GPIO18_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO18_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO19_ENA_SHIFT 19
+#define I40E_EMPINT_GPIO_ENA_GPIO19_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO19_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO20_ENA_SHIFT 20
+#define I40E_EMPINT_GPIO_ENA_GPIO20_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO20_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO21_ENA_SHIFT 21
+#define I40E_EMPINT_GPIO_ENA_GPIO21_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO21_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO22_ENA_SHIFT 22
+#define I40E_EMPINT_GPIO_ENA_GPIO22_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO22_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO23_ENA_SHIFT 23
+#define I40E_EMPINT_GPIO_ENA_GPIO23_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO23_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO24_ENA_SHIFT 24
+#define I40E_EMPINT_GPIO_ENA_GPIO24_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO24_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO25_ENA_SHIFT 25
+#define I40E_EMPINT_GPIO_ENA_GPIO25_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO25_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO26_ENA_SHIFT 26
+#define I40E_EMPINT_GPIO_ENA_GPIO26_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO26_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO27_ENA_SHIFT 27
+#define I40E_EMPINT_GPIO_ENA_GPIO27_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO27_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO28_ENA_SHIFT 28
+#define I40E_EMPINT_GPIO_ENA_GPIO28_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO28_ENA_SHIFT)
+#define I40E_EMPINT_GPIO_ENA_GPIO29_ENA_SHIFT 29
+#define I40E_EMPINT_GPIO_ENA_GPIO29_ENA_MASK (0x1 << I40E_EMPINT_GPIO_ENA_GPIO29_ENA_SHIFT)
+#define I40E_PFGEN_PORTMDIO_NUM 0x0003F100
+#define I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_SHIFT 0
+#define I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_MASK (0x3 << I40E_PFGEN_PORTMDIO_NUM_PORT_NUM_SHIFT)
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT 4
+#define I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_MASK (0x1 << I40E_PFGEN_PORTMDIO_NUM_VFLINK_STAT_ENA_SHIFT)
+#define I40E_PFINT_AEQCTL 0x00038700
+#define I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_AEQCTL_MSIX_INDX_MASK (0xFF << I40E_PFINT_AEQCTL_MSIX_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_AEQCTL_ITR_INDX_MASK (0x3 << I40E_PFINT_AEQCTL_ITR_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_PFINT_AEQCTL_MSIX0_INDX_MASK (0x7 << I40E_PFINT_AEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_AEQCTL_CAUSE_ENA_MASK (0x1 << I40E_PFINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_PFINT_AEQCTL_INTEVENT_SHIFT 31
+#define I40E_PFINT_AEQCTL_INTEVENT_MASK (0x1 << I40E_PFINT_AEQCTL_INTEVENT_SHIFT)
+#define I40E_PFINT_CEQCTL(_INTPF) (0x00036800 + ((_INTPF) * 4)) /* _i=0...511 */
+#define I40E_PFINT_CEQCTL_MAX_INDEX 511
+#define I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_PFINT_CEQCTL_MSIX_INDX_MASK (0xFF << I40E_PFINT_CEQCTL_MSIX_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_PFINT_CEQCTL_ITR_INDX_MASK (0x3 << I40E_PFINT_CEQCTL_ITR_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_PFINT_CEQCTL_MSIX0_INDX_MASK (0x7 << I40E_PFINT_CEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_PFINT_CEQCTL_NEXTQ_INDX_MASK (0x7FF << I40E_PFINT_CEQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_PFINT_CEQCTL_NEXTQ_TYPE_MASK (0x3 << I40E_PFINT_CEQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_PFINT_CEQCTL_CAUSE_ENA_MASK (0x1 << I40E_PFINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_PFINT_CEQCTL_INTEVENT_SHIFT 31
+#define I40E_PFINT_CEQCTL_INTEVENT_MASK (0x1 << I40E_PFINT_CEQCTL_INTEVENT_SHIFT)
+#define I40E_PFINT_DYN_CTL0 0x00038480
+#define I40E_PFINT_DYN_CTL0_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTL0_INTENA_MASK (0x1 << I40E_PFINT_DYN_CTL0_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTL0_CLEARPBA_MASK (0x1 << I40E_PFINT_DYN_CTL0_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTL0_SWINT_TRIG_MASK (0x1 << I40E_PFINT_DYN_CTL0_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTL0_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTL0_INTERVAL_MASK (0xFFF << I40E_PFINT_DYN_CTL0_INTERVAL_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK (0x1 << I40E_PFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTL0_SW_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTL0_SW_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT 31
+#define I40E_PFINT_DYN_CTL0_INTENA_MSK_MASK (0x1 << I40E_PFINT_DYN_CTL0_INTENA_MSK_SHIFT)
+#define I40E_PFINT_DYN_CTLN(_INTPF) (0x00034800 + ((_INTPF) * 4)) /* _i=0...511 */
+#define I40E_PFINT_DYN_CTLN_MAX_INDEX 511
+#define I40E_PFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_PFINT_DYN_CTLN_INTENA_MASK (0x1 << I40E_PFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_PFINT_DYN_CTLN_CLEARPBA_MASK (0x1 << I40E_PFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT 2
+#define I40E_PFINT_DYN_CTLN_SWINT_TRIG_MASK (0x1 << I40E_PFINT_DYN_CTLN_SWINT_TRIG_SHIFT)
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_PFINT_DYN_CTLN_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTLN_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_PFINT_DYN_CTLN_INTERVAL_MASK (0xFFF << I40E_PFINT_DYN_CTLN_INTERVAL_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK (0x1 << I40E_PFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_PFINT_DYN_CTLN_SW_ITR_INDX_MASK (0x3 << I40E_PFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+#define I40E_PFINT_DYN_CTLN_INTENA_MSK_SHIFT 31
+#define I40E_PFINT_DYN_CTLN_INTENA_MSK_MASK (0x1 << I40E_PFINT_DYN_CTLN_INTENA_MSK_SHIFT)
+#define I40E_PFINT_GPIO_ENA 0x00088080
+#define I40E_PFINT_GPIO_ENA_GPIO0_ENA_SHIFT 0
+#define I40E_PFINT_GPIO_ENA_GPIO0_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO0_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO1_ENA_SHIFT 1
+#define I40E_PFINT_GPIO_ENA_GPIO1_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO1_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO2_ENA_SHIFT 2
+#define I40E_PFINT_GPIO_ENA_GPIO2_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO2_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO3_ENA_SHIFT 3
+#define I40E_PFINT_GPIO_ENA_GPIO3_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO3_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO4_ENA_SHIFT 4
+#define I40E_PFINT_GPIO_ENA_GPIO4_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO4_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO5_ENA_SHIFT 5
+#define I40E_PFINT_GPIO_ENA_GPIO5_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO5_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO6_ENA_SHIFT 6
+#define I40E_PFINT_GPIO_ENA_GPIO6_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO6_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO7_ENA_SHIFT 7
+#define I40E_PFINT_GPIO_ENA_GPIO7_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO7_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO8_ENA_SHIFT 8
+#define I40E_PFINT_GPIO_ENA_GPIO8_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO8_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO9_ENA_SHIFT 9
+#define I40E_PFINT_GPIO_ENA_GPIO9_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO9_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO10_ENA_SHIFT 10
+#define I40E_PFINT_GPIO_ENA_GPIO10_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO10_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO11_ENA_SHIFT 11
+#define I40E_PFINT_GPIO_ENA_GPIO11_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO11_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO12_ENA_SHIFT 12
+#define I40E_PFINT_GPIO_ENA_GPIO12_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO12_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO13_ENA_SHIFT 13
+#define I40E_PFINT_GPIO_ENA_GPIO13_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO13_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO14_ENA_SHIFT 14
+#define I40E_PFINT_GPIO_ENA_GPIO14_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO14_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO15_ENA_SHIFT 15
+#define I40E_PFINT_GPIO_ENA_GPIO15_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO15_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO16_ENA_SHIFT 16
+#define I40E_PFINT_GPIO_ENA_GPIO16_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO16_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO17_ENA_SHIFT 17
+#define I40E_PFINT_GPIO_ENA_GPIO17_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO17_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO18_ENA_SHIFT 18
+#define I40E_PFINT_GPIO_ENA_GPIO18_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO18_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO19_ENA_SHIFT 19
+#define I40E_PFINT_GPIO_ENA_GPIO19_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO19_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO20_ENA_SHIFT 20
+#define I40E_PFINT_GPIO_ENA_GPIO20_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO20_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO21_ENA_SHIFT 21
+#define I40E_PFINT_GPIO_ENA_GPIO21_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO21_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO22_ENA_SHIFT 22
+#define I40E_PFINT_GPIO_ENA_GPIO22_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO22_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO23_ENA_SHIFT 23
+#define I40E_PFINT_GPIO_ENA_GPIO23_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO23_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO24_ENA_SHIFT 24
+#define I40E_PFINT_GPIO_ENA_GPIO24_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO24_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO25_ENA_SHIFT 25
+#define I40E_PFINT_GPIO_ENA_GPIO25_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO25_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO26_ENA_SHIFT 26
+#define I40E_PFINT_GPIO_ENA_GPIO26_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO26_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO27_ENA_SHIFT 27
+#define I40E_PFINT_GPIO_ENA_GPIO27_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO27_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO28_ENA_SHIFT 28
+#define I40E_PFINT_GPIO_ENA_GPIO28_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO28_ENA_SHIFT)
+#define I40E_PFINT_GPIO_ENA_GPIO29_ENA_SHIFT 29
+#define I40E_PFINT_GPIO_ENA_GPIO29_ENA_MASK (0x1 << I40E_PFINT_GPIO_ENA_GPIO29_ENA_SHIFT)
+#define I40E_PFINT_ICR0 0x00038780
+#define I40E_PFINT_ICR0_INTEVENT_SHIFT 0
+#define I40E_PFINT_ICR0_INTEVENT_MASK (0x1 << I40E_PFINT_ICR0_INTEVENT_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_0_SHIFT 1
+#define I40E_PFINT_ICR0_QUEUE_0_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_1_SHIFT 2
+#define I40E_PFINT_ICR0_QUEUE_1_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_1_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_2_SHIFT 3
+#define I40E_PFINT_ICR0_QUEUE_2_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_2_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_3_SHIFT 4
+#define I40E_PFINT_ICR0_QUEUE_3_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_3_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_4_SHIFT 5
+#define I40E_PFINT_ICR0_QUEUE_4_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_4_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_5_SHIFT 6
+#define I40E_PFINT_ICR0_QUEUE_5_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_5_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_6_SHIFT 7
+#define I40E_PFINT_ICR0_QUEUE_6_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_6_SHIFT)
+#define I40E_PFINT_ICR0_QUEUE_7_SHIFT 8
+#define I40E_PFINT_ICR0_QUEUE_7_MASK (0x1 << I40E_PFINT_ICR0_QUEUE_7_SHIFT)
+#define I40E_PFINT_ICR0_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ECC_ERR_MASK (0x1 << I40E_PFINT_ICR0_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_MAL_DETECT_MASK (0x1 << I40E_PFINT_ICR0_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_GRST_MASK (0x1 << I40E_PFINT_ICR0_GRST_SHIFT)
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_PCI_EXCEPTION_MASK (0x1 << I40E_PFINT_ICR0_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_GPIO_SHIFT 22
+#define I40E_PFINT_ICR0_GPIO_MASK (0x1 << I40E_PFINT_ICR0_GPIO_SHIFT)
+#define I40E_PFINT_ICR0_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_STORM_DETECT_SHIFT 24
+#define I40E_PFINT_ICR0_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_STORM_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_PFINT_ICR0_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
+#define I40E_PFINT_ICR0_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_HMC_ERR_MASK (0x1 << I40E_PFINT_ICR0_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_PE_CRITERR_MASK (0x1 << I40E_PFINT_ICR0_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_VFLR_MASK (0x1 << I40E_PFINT_ICR0_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ADMINQ_MASK (0x1 << I40E_PFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_PFINT_ICR0_SWINT_SHIFT 31
+#define I40E_PFINT_ICR0_SWINT_MASK (0x1 << I40E_PFINT_ICR0_SWINT_SHIFT)
+#define I40E_PFINT_ICR0_ENA 0x00038800
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT 16
+#define I40E_PFINT_ICR0_ENA_ECC_ERR_MASK (0x1 << I40E_PFINT_ICR0_ENA_ECC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT 19
+#define I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK (0x1 << I40E_PFINT_ICR0_ENA_MAL_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GRST_SHIFT 20
+#define I40E_PFINT_ICR0_ENA_GRST_MASK (0x1 << I40E_PFINT_ICR0_ENA_GRST_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT 21
+#define I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_MASK (0x1 << I40E_PFINT_ICR0_ENA_PCI_EXCEPTION_SHIFT)
+#define I40E_PFINT_ICR0_ENA_GPIO_SHIFT 22
+#define I40E_PFINT_ICR0_ENA_GPIO_MASK (0x1 << I40E_PFINT_ICR0_ENA_GPIO_SHIFT)
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT 23
+#define I40E_PFINT_ICR0_ENA_TIMESYNC_MASK (0x1 << I40E_PFINT_ICR0_ENA_TIMESYNC_SHIFT)
+#define I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT 24
+#define I40E_PFINT_ICR0_ENA_STORM_DETECT_MASK (0x1 << I40E_PFINT_ICR0_ENA_STORM_DETECT_SHIFT)
+#define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK (0x1 << I40E_PFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT 26
+#define I40E_PFINT_ICR0_ENA_HMC_ERR_MASK (0x1 << I40E_PFINT_ICR0_ENA_HMC_ERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT 28
+#define I40E_PFINT_ICR0_ENA_PE_CRITERR_MASK (0x1 << I40E_PFINT_ICR0_ENA_PE_CRITERR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_VFLR_SHIFT 29
+#define I40E_PFINT_ICR0_ENA_VFLR_MASK (0x1 << I40E_PFINT_ICR0_ENA_VFLR_SHIFT)
+#define I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT 30
+#define I40E_PFINT_ICR0_ENA_ADMINQ_MASK (0x1 << I40E_PFINT_ICR0_ENA_ADMINQ_SHIFT)
+#define I40E_PFINT_ICR0_ENA_RSVD_SHIFT 31
+#define I40E_PFINT_ICR0_ENA_RSVD_MASK (0x1 << I40E_PFINT_ICR0_ENA_RSVD_SHIFT)
+#define I40E_PFINT_ITR0(_i) (0x00038000 + ((_i) * 128)) /* _i=0...2 */
+#define I40E_PFINT_ITR0_MAX_INDEX 2
+#define I40E_PFINT_ITR0_INTERVAL_SHIFT 0
+#define I40E_PFINT_ITR0_INTERVAL_MASK (0xFFF << I40E_PFINT_ITR0_INTERVAL_SHIFT)
+#define I40E_PFINT_ITRN(_i, _INTPF) (0x00030000 + ((_i) * 2048 + (_INTPF) * 4))
+#define I40E_PFINT_ITRN_MAX_INDEX 2
+#define I40E_PFINT_ITRN_INTERVAL_SHIFT 0
+#define I40E_PFINT_ITRN_INTERVAL_MASK (0xFFF << I40E_PFINT_ITRN_INTERVAL_SHIFT)
+#define I40E_PFINT_LNKLST0 0x00038500
+#define I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLST0_FIRSTQ_INDX_MASK (0x7FF << I40E_PFINT_LNKLST0_FIRSTQ_INDX_SHIFT)
+#define I40E_PFINT_LNKLST0_FIRSTQ_TYPE_SHIFT 11
+#define I40E_PFINT_LNKLST0_FIRSTQ_TYPE_MASK (0x3 << I40E_PFINT_LNKLST0_FIRSTQ_TYPE_SHIFT)
+#define I40E_PFINT_LNKLSTN(_INTPF) (0x00035000 + ((_INTPF) * 4)) /* _i=0...511 */
+#define I40E_PFINT_LNKLSTN_MAX_INDEX 511
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_PFINT_LNKLSTN_FIRSTQ_INDX_MASK (0x7FF << I40E_PFINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_MASK (0x3 << I40E_PFINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+#define I40E_PFINT_RATE0 0x00038580
+#define I40E_PFINT_RATE0_INTERVAL_SHIFT 0
+#define I40E_PFINT_RATE0_INTERVAL_MASK (0x3F << I40E_PFINT_RATE0_INTERVAL_SHIFT)
+#define I40E_PFINT_RATE0_INTRL_ENA_SHIFT 6
+#define I40E_PFINT_RATE0_INTRL_ENA_MASK (0x1 << I40E_PFINT_RATE0_INTRL_ENA_SHIFT)
+#define I40E_PFINT_RATEN(_INTPF) (0x00035800 + ((_INTPF) * 4)) /* _i=0...511 */
+#define I40E_PFINT_RATEN_MAX_INDEX 511
+#define I40E_PFINT_RATEN_INTERVAL_SHIFT 0
+#define I40E_PFINT_RATEN_INTERVAL_MASK (0x3F << I40E_PFINT_RATEN_INTERVAL_SHIFT)
+#define I40E_PFINT_RATEN_INTRL_ENA_SHIFT 6
+#define I40E_PFINT_RATEN_INTRL_ENA_MASK (0x1 << I40E_PFINT_RATEN_INTRL_ENA_SHIFT)
+#define I40E_PFINT_STAT_CTL0 0x00038400
+#define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
+#define I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_MASK (0x3 << I40E_PFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
+#define I40E_QINT_RQCTL(_Q) (0x0003A000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QINT_RQCTL_MAX_INDEX 1535
+#define I40E_QINT_RQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_RQCTL_MSIX_INDX_MASK (0xFF << I40E_QINT_RQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_RQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_RQCTL_ITR_INDX_MASK (0x3 << I40E_QINT_RQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_RQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_RQCTL_MSIX0_INDX_MASK (0x7 << I40E_QINT_RQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_RQCTL_NEXTQ_INDX_MASK (0x7FF << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_RQCTL_NEXTQ_TYPE_MASK (0x3 << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_QINT_RQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_RQCTL_CAUSE_ENA_MASK (0x1 << I40E_QINT_RQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_RQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_RQCTL_INTEVENT_MASK (0x1 << I40E_QINT_RQCTL_INTEVENT_SHIFT)
+#define I40E_QINT_TQCTL(_Q) (0x0003C000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QINT_TQCTL_MAX_INDEX 1535
+#define I40E_QINT_TQCTL_MSIX_INDX_SHIFT 0
+#define I40E_QINT_TQCTL_MSIX_INDX_MASK (0xFF << I40E_QINT_TQCTL_MSIX_INDX_SHIFT)
+#define I40E_QINT_TQCTL_ITR_INDX_SHIFT 11
+#define I40E_QINT_TQCTL_ITR_INDX_MASK (0x3 << I40E_QINT_TQCTL_ITR_INDX_SHIFT)
+#define I40E_QINT_TQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_QINT_TQCTL_MSIX0_INDX_MASK (0x7 << I40E_QINT_TQCTL_MSIX0_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_QINT_TQCTL_NEXTQ_INDX_MASK (0x7FF << I40E_QINT_TQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_QINT_TQCTL_NEXTQ_TYPE_MASK (0x3 << I40E_QINT_TQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_QINT_TQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_QINT_TQCTL_CAUSE_ENA_MASK (0x1 << I40E_QINT_TQCTL_CAUSE_ENA_SHIFT)
+#define I40E_QINT_TQCTL_INTEVENT_SHIFT 31
+#define I40E_QINT_TQCTL_INTEVENT_MASK (0x1 << I40E_QINT_TQCTL_INTEVENT_SHIFT)
+#define I40E_VFINT_DYN_CTL0(_VF) (0x0002A400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFINT_DYN_CTL0_MAX_INDEX 127
+#define I40E_VFINT_DYN_CTL0_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTL0_INTENA_MASK (0x1 << I40E_VFINT_DYN_CTL0_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTL0_CLEARPBA_MASK (0x1 << I40E_VFINT_DYN_CTL0_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTL0_SWINT_TRIG_MASK (0x1 << I40E_VFINT_DYN_CTL0_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTL0_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTL0_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTL0_INTERVAL_MASK (0xFFF << I40E_VFINT_DYN_CTL0_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_MASK (0x1 << I40E_VFINT_DYN_CTL0_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTL0_SW_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTL0_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL0_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTL0_INTENA_MSK_MASK (0x1 << I40E_VFINT_DYN_CTL0_INTENA_MSK_SHIFT)
+#define I40E_VFINT_DYN_CTLN(_INTVF) (0x00024800 + ((_INTVF) * 4)) /* _i=0...511 */
+#define I40E_VFINT_DYN_CTLN_MAX_INDEX 511
+#define I40E_VFINT_DYN_CTLN_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTLN_INTENA_MASK (0x1 << I40E_VFINT_DYN_CTLN_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTLN_CLEARPBA_MASK (0x1 << I40E_VFINT_DYN_CTLN_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTLN_SWINT_TRIG_MASK (0x1 << I40E_VFINT_DYN_CTLN_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTLN_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTLN_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTLN_INTERVAL_MASK (0xFFF << I40E_VFINT_DYN_CTLN_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_MASK (0x1 << I40E_VFINT_DYN_CTLN_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTLN_SW_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTLN_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTLN_INTENA_MSK_MASK (0x1 << I40E_VFINT_DYN_CTLN_INTENA_MSK_SHIFT)
+#define I40E_VFINT_ICR0(_VF) (0x0002BC00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFINT_ICR0_MAX_INDEX 127
+#define I40E_VFINT_ICR0_INTEVENT_SHIFT 0
+#define I40E_VFINT_ICR0_INTEVENT_MASK (0x1 << I40E_VFINT_ICR0_INTEVENT_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_0_SHIFT 1
+#define I40E_VFINT_ICR0_QUEUE_0_MASK (0x1 << I40E_VFINT_ICR0_QUEUE_0_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_1_SHIFT 2
+#define I40E_VFINT_ICR0_QUEUE_1_MASK (0x1 << I40E_VFINT_ICR0_QUEUE_1_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_2_SHIFT 3
+#define I40E_VFINT_ICR0_QUEUE_2_MASK (0x1 << I40E_VFINT_ICR0_QUEUE_2_SHIFT)
+#define I40E_VFINT_ICR0_QUEUE_3_SHIFT 4
+#define I40E_VFINT_ICR0_QUEUE_3_MASK (0x1 << I40E_VFINT_ICR0_QUEUE_3_SHIFT)
+#define I40E_VFINT_ICR0_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_LINK_STAT_CHANGE_MASK (0x1 << I40E_VFINT_ICR0_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ADMINQ_MASK (0x1 << I40E_VFINT_ICR0_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_SWINT_SHIFT 31
+#define I40E_VFINT_ICR0_SWINT_MASK (0x1 << I40E_VFINT_ICR0_SWINT_SHIFT)
+#define I40E_VFINT_ICR0_ENA(_VF) (0x0002C000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFINT_ICR0_ENA_MAX_INDEX 127
+#define I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_MASK (0x1 << I40E_VFINT_ICR0_ENA_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ENA_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ENA_ADMINQ_MASK (0x1 << I40E_VFINT_ICR0_ENA_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA_RSVD_SHIFT 31
+#define I40E_VFINT_ICR0_ENA_RSVD_MASK (0x1 << I40E_VFINT_ICR0_ENA_RSVD_SHIFT)
+#define I40E_VFINT_ITR0(_i, _VF) (0x00028000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...2, _VF=0...127 */
+#define I40E_VFINT_ITR0_MAX_INDEX 2
+#define I40E_VFINT_ITR0_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITR0_INTERVAL_MASK (0xFFF << I40E_VFINT_ITR0_INTERVAL_SHIFT)
+#define I40E_VFINT_ITRN(_i, _INTVF) (0x00020000 + ((_i) * 2048 + (_INTVF) * 4))
+#define I40E_VFINT_ITRN_MAX_INDEX 2
+#define I40E_VFINT_ITRN_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITRN_INTERVAL_MASK (0xFFF << I40E_VFINT_ITRN_INTERVAL_SHIFT)
+#define I40E_VFINT_STAT_CTL0(_VF) (0x0002A000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFINT_STAT_CTL0_MAX_INDEX 127
+#define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT 2
+#define I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_MASK (0x3 << I40E_VFINT_STAT_CTL0_OTHER_ITR_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL(_VF) (0x0002B800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPINT_AEQCTL_MAX_INDEX 127
+#define I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_AEQCTL_MSIX_INDX_MASK (0xFF << I40E_VPINT_AEQCTL_MSIX_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_AEQCTL_ITR_INDX_MASK (0x3 << I40E_VPINT_AEQCTL_ITR_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_VPINT_AEQCTL_MSIX0_INDX_MASK (0x7 << I40E_VPINT_AEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_AEQCTL_CAUSE_ENA_MASK (0x1 << I40E_VPINT_AEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_AEQCTL_INTEVENT_SHIFT 31
+#define I40E_VPINT_AEQCTL_INTEVENT_MASK (0x1 << I40E_VPINT_AEQCTL_INTEVENT_SHIFT)
+#define I40E_VPINT_CEQCTL(_INTVF) (0x00026800 + ((_INTVF) * 4)) /* _i=0...511 */
+#define I40E_VPINT_CEQCTL_MAX_INDEX 511
+#define I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT 0
+#define I40E_VPINT_CEQCTL_MSIX_INDX_MASK (0xFF << I40E_VPINT_CEQCTL_MSIX_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_ITR_INDX_SHIFT 11
+#define I40E_VPINT_CEQCTL_ITR_INDX_MASK (0x3 << I40E_VPINT_CEQCTL_ITR_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_MSIX0_INDX_SHIFT 13
+#define I40E_VPINT_CEQCTL_MSIX0_INDX_MASK (0x7 << I40E_VPINT_CEQCTL_MSIX0_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT 16
+#define I40E_VPINT_CEQCTL_NEXTQ_INDX_MASK (0x7FF << I40E_VPINT_CEQCTL_NEXTQ_INDX_SHIFT)
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT 27
+#define I40E_VPINT_CEQCTL_NEXTQ_TYPE_MASK (0x3 << I40E_VPINT_CEQCTL_NEXTQ_TYPE_SHIFT)
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT 30
+#define I40E_VPINT_CEQCTL_CAUSE_ENA_MASK (0x1 << I40E_VPINT_CEQCTL_CAUSE_ENA_SHIFT)
+#define I40E_VPINT_CEQCTL_INTEVENT_SHIFT 31
+#define I40E_VPINT_CEQCTL_INTEVENT_MASK (0x1 << I40E_VPINT_CEQCTL_INTEVENT_SHIFT)
+#define I40E_VPINT_LNKLST0(_VF) (0x0002A800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPINT_LNKLST0_MAX_INDEX 127
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK (0x7FF << I40E_VPINT_LNKLST0_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT 11
+#define I40E_VPINT_LNKLST0_FIRSTQ_TYPE_MASK (0x3 << I40E_VPINT_LNKLST0_FIRSTQ_TYPE_SHIFT)
+#define I40E_VPINT_LNKLSTN(_INTVF) (0x00025000 + ((_INTVF) * 4)) /* _i=0...511 */
+#define I40E_VPINT_LNKLSTN_MAX_INDEX 511
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT 0
+#define I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK (0x7FF << I40E_VPINT_LNKLSTN_FIRSTQ_INDX_SHIFT)
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT 11
+#define I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK (0x3 << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT)
+#define I40E_VPINT_RATE0(_VF) (0x0002AC00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPINT_RATE0_MAX_INDEX 127
+#define I40E_VPINT_RATE0_INTERVAL_SHIFT 0
+#define I40E_VPINT_RATE0_INTERVAL_MASK (0x3F << I40E_VPINT_RATE0_INTERVAL_SHIFT)
+#define I40E_VPINT_RATE0_INTRL_ENA_SHIFT 6
+#define I40E_VPINT_RATE0_INTRL_ENA_MASK (0x1 << I40E_VPINT_RATE0_INTRL_ENA_SHIFT)
+#define I40E_VPINT_RATEN(_INTVF) (0x00025800 + ((_INTVF) * 4)) /* _i=0...511 */
+#define I40E_VPINT_RATEN_MAX_INDEX 511
+#define I40E_VPINT_RATEN_INTERVAL_SHIFT 0
+#define I40E_VPINT_RATEN_INTERVAL_MASK (0x3F << I40E_VPINT_RATEN_INTERVAL_SHIFT)
+#define I40E_VPINT_RATEN_INTRL_ENA_SHIFT 6
+#define I40E_VPINT_RATEN_INTRL_ENA_MASK (0x1 << I40E_VPINT_RATEN_INTRL_ENA_SHIFT)
+#define I40E_GL_RDPU_CNTRL 0x00051060
+#define I40E_GL_RDPU_CNTRL_RX_PAD_EN_SHIFT 0
+#define I40E_GL_RDPU_CNTRL_RX_PAD_EN_MASK (0x1 << I40E_GL_RDPU_CNTRL_RX_PAD_EN_SHIFT)
+#define I40E_GL_RDPU_CNTRL_ECO_SHIFT 1
+#define I40E_GL_RDPU_CNTRL_ECO_MASK (0x7FFFFFFF << I40E_GL_RDPU_CNTRL_ECO_SHIFT)
+#define I40E_GLLAN_RCTL_0 0x0012A500
+#define I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT 0
+#define I40E_GLLAN_RCTL_0_PXE_MODE_MASK (0x1 << I40E_GLLAN_RCTL_0_PXE_MODE_SHIFT)
+#define I40E_GLLAN_TSOMSK_F 0x000442D8
+#define I40E_GLLAN_TSOMSK_F_TCPMSKF_SHIFT 0
+#define I40E_GLLAN_TSOMSK_F_TCPMSKF_MASK (0xFFF << I40E_GLLAN_TSOMSK_F_TCPMSKF_SHIFT)
+#define I40E_GLLAN_TSOMSK_L 0x000442E0
+#define I40E_GLLAN_TSOMSK_L_TCPMSKL_SHIFT 0
+#define I40E_GLLAN_TSOMSK_L_TCPMSKL_MASK (0xFFF << I40E_GLLAN_TSOMSK_L_TCPMSKL_SHIFT)
+#define I40E_GLLAN_TSOMSK_M 0x000442DC
+#define I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT 0
+#define I40E_GLLAN_TSOMSK_M_TCPMSKM_MASK (0xFFF << I40E_GLLAN_TSOMSK_M_TCPMSKM_SHIFT)
+#define I40E_PFLAN_QALLOC 0x001C0400
+#define I40E_PFLAN_QALLOC_FIRSTQ_SHIFT 0
+#define I40E_PFLAN_QALLOC_FIRSTQ_MASK (0x7FF << I40E_PFLAN_QALLOC_FIRSTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_LASTQ_SHIFT 16
+#define I40E_PFLAN_QALLOC_LASTQ_MASK (0x7FF << I40E_PFLAN_QALLOC_LASTQ_SHIFT)
+#define I40E_PFLAN_QALLOC_VALID_SHIFT 31
+#define I40E_PFLAN_QALLOC_VALID_MASK (0x1 << I40E_PFLAN_QALLOC_VALID_SHIFT)
+#define I40E_QRX_ENA(_Q) (0x00120000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QRX_ENA_MAX_INDEX 1535
+#define I40E_QRX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QRX_ENA_QENA_REQ_MASK (0x1 << I40E_QRX_ENA_QENA_REQ_SHIFT)
+#define I40E_QRX_ENA_FAST_QDIS_SHIFT 1
+#define I40E_QRX_ENA_FAST_QDIS_MASK (0x1 << I40E_QRX_ENA_FAST_QDIS_SHIFT)
+#define I40E_QRX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QRX_ENA_QENA_STAT_MASK (0x1 << I40E_QRX_ENA_QENA_STAT_SHIFT)
+#define I40E_QRX_TAIL(_Q) (0x00128000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QRX_TAIL_MAX_INDEX 1535
+#define I40E_QRX_TAIL_TAIL_SHIFT 0
+#define I40E_QRX_TAIL_TAIL_MASK (0x1FFF << I40E_QRX_TAIL_TAIL_SHIFT)
+#define I40E_QTX_CTL(_Q) (0x00104000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QTX_CTL_MAX_INDEX 1535
+#define I40E_QTX_CTL_PFVF_Q_SHIFT 0
+#define I40E_QTX_CTL_PFVF_Q_MASK (0x3 << I40E_QTX_CTL_PFVF_Q_SHIFT)
+#define I40E_QTX_CTL_PF_INDX_SHIFT 2
+#define I40E_QTX_CTL_PF_INDX_MASK (0xF << I40E_QTX_CTL_PF_INDX_SHIFT)
+#define I40E_QTX_CTL_VFVM_INDX_SHIFT 7
+#define I40E_QTX_CTL_VFVM_INDX_MASK (0x1FF << I40E_QTX_CTL_VFVM_INDX_SHIFT)
+#define I40E_QTX_ENA(_Q) (0x00100000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QTX_ENA_MAX_INDEX 1535
+#define I40E_QTX_ENA_QENA_REQ_SHIFT 0
+#define I40E_QTX_ENA_QENA_REQ_MASK (0x1 << I40E_QTX_ENA_QENA_REQ_SHIFT)
+#define I40E_QTX_ENA_FAST_QDIS_SHIFT 1
+#define I40E_QTX_ENA_FAST_QDIS_MASK (0x1 << I40E_QTX_ENA_FAST_QDIS_SHIFT)
+#define I40E_QTX_ENA_QENA_STAT_SHIFT 2
+#define I40E_QTX_ENA_QENA_STAT_MASK (0x1 << I40E_QTX_ENA_QENA_STAT_SHIFT)
+#define I40E_QTX_HEAD(_Q) (0x000E4000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QTX_HEAD_MAX_INDEX 1535
+#define I40E_QTX_HEAD_HEAD_SHIFT 0
+#define I40E_QTX_HEAD_HEAD_MASK (0x1FFF << I40E_QTX_HEAD_HEAD_SHIFT)
+#define I40E_QTX_HEAD_RS_PENDING_SHIFT 16
+#define I40E_QTX_HEAD_RS_PENDING_MASK (0x1 << I40E_QTX_HEAD_RS_PENDING_SHIFT)
+#define I40E_QTX_TAIL(_Q) (0x00108000 + ((_Q) * 4)) /* _i=0...1535 */
+#define I40E_QTX_TAIL_MAX_INDEX 1535
+#define I40E_QTX_TAIL_TAIL_SHIFT 0
+#define I40E_QTX_TAIL_TAIL_MASK (0x1FFF << I40E_QTX_TAIL_TAIL_SHIFT)
+#define I40E_VPLAN_MAPENA(_VF) (0x00074000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPLAN_MAPENA_MAX_INDEX 127
+#define I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT 0
+#define I40E_VPLAN_MAPENA_TXRX_ENA_MASK (0x1 << I40E_VPLAN_MAPENA_TXRX_ENA_SHIFT)
+#define I40E_VPLAN_QTABLE(_i, _VF) (0x00070000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */
+#define I40E_VPLAN_QTABLE_MAX_INDEX 15
+#define I40E_VPLAN_QTABLE_QINDEX_SHIFT 0
+#define I40E_VPLAN_QTABLE_QINDEX_MASK (0x7FF << I40E_VPLAN_QTABLE_QINDEX_SHIFT)
+#define I40E_VSILAN_QBASE(_VSI) (0x0020C800 + ((_VSI) * 4)) /* _i=0...383 */
+#define I40E_VSILAN_QBASE_MAX_INDEX 383
+#define I40E_VSILAN_QBASE_VSIBASE_SHIFT 0
+#define I40E_VSILAN_QBASE_VSIBASE_MASK (0x7FF << I40E_VSILAN_QBASE_VSIBASE_SHIFT)
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT 11
+#define I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK (0x1 << I40E_VSILAN_QBASE_VSIQTABLE_ENA_SHIFT)
+#define I40E_VSILAN_QTABLE(_i, _VSI) (0x00200000 + ((_i) * 2048 + (_VSI) * 4))
+#define I40E_VSILAN_QTABLE_MAX_INDEX 15
+#define I40E_VSILAN_QTABLE_QINDEX_0_SHIFT 0
+#define I40E_VSILAN_QTABLE_QINDEX_0_MASK (0x7FF << I40E_VSILAN_QTABLE_QINDEX_0_SHIFT)
+#define I40E_VSILAN_QTABLE_QINDEX_1_SHIFT 16
+#define I40E_VSILAN_QTABLE_QINDEX_1_MASK (0x7FF << I40E_VSILAN_QTABLE_QINDEX_1_SHIFT)
+#define I40E_PRTGL_SAH 0x001E2140
+#define I40E_PRTGL_SAH_FC_SAH_SHIFT 0
+#define I40E_PRTGL_SAH_FC_SAH_MASK (0xFFFF << I40E_PRTGL_SAH_FC_SAH_SHIFT)
+#define I40E_PRTGL_SAH_MFS_SHIFT 16
+#define I40E_PRTGL_SAH_MFS_MASK (0xFFFF << I40E_PRTGL_SAH_MFS_SHIFT)
+#define I40E_PRTGL_SAL 0x001E2120
+#define I40E_PRTGL_SAL_FC_SAL_SHIFT 0
+#define I40E_PRTGL_SAL_FC_SAL_MASK (0xFFFFFFFF << I40E_PRTGL_SAL_FC_SAL_SHIFT)
+#define I40E_PRTMAC_HLCTLA 0x001E4760
+#define I40E_PRTMAC_HLCTLA_DROP_US_PKTS_SHIFT 0
+#define I40E_PRTMAC_HLCTLA_DROP_US_PKTS_MASK (0x1 << I40E_PRTMAC_HLCTLA_DROP_US_PKTS_SHIFT)
+#define I40E_PRTMAC_HLCTLA_RX_FWRD_CTRL_SHIFT 1
+#define I40E_PRTMAC_HLCTLA_RX_FWRD_CTRL_MASK (0x1 << I40E_PRTMAC_HLCTLA_RX_FWRD_CTRL_SHIFT)
+#define I40E_PRTMAC_HLCTLA_CHOP_OS_PKT_SHIFT 2
+#define I40E_PRTMAC_HLCTLA_CHOP_OS_PKT_MASK (0x1 << I40E_PRTMAC_HLCTLA_CHOP_OS_PKT_SHIFT)
+#define I40E_PRTMAC_HLCTLA_TX_HYSTERESIS_SHIFT 4
+#define I40E_PRTMAC_HLCTLA_TX_HYSTERESIS_MASK (0x7 << I40E_PRTMAC_HLCTLA_TX_HYSTERESIS_SHIFT)
+#define I40E_PRTMAC_HLCTLA_HYS_FLUSH_PKT_SHIFT 7
+#define I40E_PRTMAC_HLCTLA_HYS_FLUSH_PKT_MASK (0x1 << I40E_PRTMAC_HLCTLA_HYS_FLUSH_PKT_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GCP 0x001E3130
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GCP_HSEC_CTL_RX_CHECK_SA_GCP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GCP_HSEC_CTL_RX_CHECK_SA_GCP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GCP_HSEC_CTL_RX_CHECK_SA_GCP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GPP 0x001E3290
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GPP_HSEC_CTL_RX_CHECK_SA_GPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GPP_HSEC_CTL_RX_CHECK_SA_GPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_GPP_HSEC_CTL_RX_CHECK_SA_GPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_PPP 0x001E3310
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_PPP_HSEC_CTL_RX_CHECK_SA_PPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_PPP_HSEC_CTL_RX_CHECK_SA_PPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_SA_PPP_HSEC_CTL_RX_CHECK_SA_PPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GCP 0x001E3100
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GCP_HSEC_CTL_RX_CHECK_UCAST_GCP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GCP_HSEC_CTL_RX_CHECK_UCAST_GCP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GCP_HSEC_CTL_RX_CHECK_UCAST_GCP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GPP 0x001E3280
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GPP_HSEC_CTL_RX_CHECK_UCAST_GPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GPP_HSEC_CTL_RX_CHECK_UCAST_GPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_GPP_HSEC_CTL_RX_CHECK_UCAST_GPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_PPP 0x001E3300
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_PPP_HSEC_CTL_RX_CHECK_UCAST_PPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_PPP_HSEC_CTL_RX_CHECK_UCAST_PPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_CHECK_UCAST_PPP_HSEC_CTL_RX_CHECK_UCAST_PPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP 0x001E30E0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GCP_HSEC_CTL_RX_ENABLE_GCP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP 0x001E3260
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_ENABLE_GPP_HSEC_CTL_RX_ENABLE_GPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP 0x001E32E0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_ENABLE_PPP_HSEC_CTL_RX_ENABLE_PPP_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL 0x001E3360
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_RX_FORWARD_CONTROL_HSEC_CTL_RX_FORWARD_CONTROL_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1 0x001E3110
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_MASK (0xFFFFFFFF << I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_HSEC_CTL_RX_PAUSE_DA_UCAST_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2 0x001E3120
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_MASK (0xFFFF << I40E_PRTMAC_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_HSEC_CTL_RX_PAUSE_DA_UCAST_PART2_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE 0x001E30C0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_MASK (0x1FF << I40E_PRTMAC_HSEC_CTL_RX_PAUSE_ENABLE_HSEC_CTL_RX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1 0x001E3140
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_MASK (0xFFFFFFFF << I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART1_HSEC_CTL_RX_PAUSE_SA_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2 0x001E3150
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_MASK (0xFFFF << I40E_PRTMAC_HSEC_CTL_RX_PAUSE_SA_PART2_HSEC_CTL_RX_PAUSE_SA_PART2_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_ENABLE 0x001E3000
+#define I40E_PRTMAC_HSEC_CTL_TX_ENABLE_HSEC_CTL_TX_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_ENABLE_HSEC_CTL_TX_ENABLE_MASK (0x1 << I40E_PRTMAC_HSEC_CTL_TX_ENABLE_HSEC_CTL_TX_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE 0x001E30D0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_MASK (0x1FF << I40E_PRTMAC_HSEC_CTL_TX_PAUSE_ENABLE_HSEC_CTL_TX_PAUSE_ENABLE_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i) (0x001E3370 + ((_i) * 16))
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_MASK (0xFFFF << I40E_PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3400 + ((_i) * 16))
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MAX_INDEX 8
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_MASK (0xFFFF << I40E_PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1 0x001E34B0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_MASK (0xFFFFFFFF << I40E_PRTMAC_HSEC_CTL_TX_SA_PART1_HSEC_CTL_TX_SA_PART1_SHIFT)
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2 0x001E34C0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_SHIFT 0
+#define I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_MASK (0xFFFF << I40E_PRTMAC_HSEC_CTL_TX_SA_PART2_HSEC_CTL_TX_SA_PART2_SHIFT)
+#define I40E_PRTMAC_HSECTL1 0x001E3560
+#define I40E_PRTMAC_HSECTL1_DROP_US_PKTS_SHIFT 0
+#define I40E_PRTMAC_HSECTL1_DROP_US_PKTS_MASK (0x1 << I40E_PRTMAC_HSECTL1_DROP_US_PKTS_SHIFT)
+#define I40E_PRTMAC_HSECTL1_PAD_US_PKT_SHIFT 3
+#define I40E_PRTMAC_HSECTL1_PAD_US_PKT_MASK (0x1 << I40E_PRTMAC_HSECTL1_PAD_US_PKT_SHIFT)
+#define I40E_PRTMAC_HSECTL1_TX_HYSTERESIS_SHIFT 4
+#define I40E_PRTMAC_HSECTL1_TX_HYSTERESIS_MASK (0x7 << I40E_PRTMAC_HSECTL1_TX_HYSTERESIS_SHIFT)
+#define I40E_PRTMAC_HSECTL1_HYS_FLUSH_PKT_SHIFT 7
+#define I40E_PRTMAC_HSECTL1_HYS_FLUSH_PKT_MASK (0x1 << I40E_PRTMAC_HSECTL1_HYS_FLUSH_PKT_SHIFT)
+#define I40E_PRTMAC_HSECTL1_EN_SFD_CHECK_SHIFT 30
+#define I40E_PRTMAC_HSECTL1_EN_SFD_CHECK_MASK (0x1 << I40E_PRTMAC_HSECTL1_EN_SFD_CHECK_SHIFT)
+#define I40E_PRTMAC_HSECTL1_EN_PREAMBLE_CHECK_SHIFT 31
+#define I40E_PRTMAC_HSECTL1_EN_PREAMBLE_CHECK_MASK (0x1 << I40E_PRTMAC_HSECTL1_EN_PREAMBLE_CHECK_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A 0x0008C480
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_SHIFT 0
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_SHIFT 2
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_SHIFT 4
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_SHIFT 6
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_TX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_SHIFT 8
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_SHIFT 10
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_SHIFT 12
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_SHIFT 14
+#define I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_A_SWAP_RX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B 0x0008C484
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_SHIFT 0
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_SHIFT 2
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_SHIFT 4
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_SHIFT 6
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_TX_LANE0_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_SHIFT 8
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE3_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_SHIFT 10
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE2_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_SHIFT 12
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE1_SHIFT)
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_SHIFT 14
+#define I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_MASK (0x3 << I40E_PRTMAC_PCS_XAUI_SWAP_B_SWAP_RX_LANE0_SHIFT)
+#define I40E_GL_MNG_FWSM 0x000B6134
+#define I40E_GL_MNG_FWSM_FW_MODES_SHIFT 0
+#define I40E_GL_MNG_FWSM_FW_MODES_MASK (0x3FF << I40E_GL_MNG_FWSM_FW_MODES_SHIFT)
+#define I40E_GL_MNG_FWSM_EEP_RELOAD_IND_SHIFT 10
+#define I40E_GL_MNG_FWSM_EEP_RELOAD_IND_MASK (0x1 << I40E_GL_MNG_FWSM_EEP_RELOAD_IND_SHIFT)
+#define I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_SHIFT 11
+#define I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_MASK (0xF << I40E_GL_MNG_FWSM_CRC_ERROR_MODULE_SHIFT)
+#define I40E_GL_MNG_FWSM_FW_STATUS_VALID_SHIFT 15
+#define I40E_GL_MNG_FWSM_FW_STATUS_VALID_MASK (0x1 << I40E_GL_MNG_FWSM_FW_STATUS_VALID_SHIFT)
+#define I40E_GL_MNG_FWSM_EXT_ERR_IND_SHIFT 19
+#define I40E_GL_MNG_FWSM_EXT_ERR_IND_MASK (0x3F << I40E_GL_MNG_FWSM_EXT_ERR_IND_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_SHIFT 26
+#define I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_MASK (0x1 << I40E_GL_MNG_FWSM_PHY_SERDES0_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_SHIFT 27
+#define I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_MASK (0x1 << I40E_GL_MNG_FWSM_PHY_SERDES1_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_SHIFT 28
+#define I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_MASK (0x1 << I40E_GL_MNG_FWSM_PHY_SERDES2_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_SHIFT 29
+#define I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_MASK (0x1 << I40E_GL_MNG_FWSM_PHY_SERDES3_CONFIG_ERR_SHIFT)
+#define I40E_GL_MNG_HWARB_CTRL 0x000B6130
+#define I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_SHIFT 0
+#define I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_MASK (0x1 << I40E_GL_MNG_HWARB_CTRL_NCSI_ARB_EN_SHIFT)
+#define I40E_PRT_MNG_FTFT_DATA(_i) (0x000852A0 + ((_i) * 32)) /* _i=0...31 */
+#define I40E_PRT_MNG_FTFT_DATA_MAX_INDEX 31
+#define I40E_PRT_MNG_FTFT_DATA_DWORD_SHIFT 0
+#define I40E_PRT_MNG_FTFT_DATA_DWORD_MASK (0xFFFFFFFF << I40E_PRT_MNG_FTFT_DATA_DWORD_SHIFT)
+#define I40E_PRT_MNG_FTFT_LENGTH 0x00085260
+#define I40E_PRT_MNG_FTFT_LENGTH_LENGTH_SHIFT 0
+#define I40E_PRT_MNG_FTFT_LENGTH_LENGTH_MASK (0xFF << I40E_PRT_MNG_FTFT_LENGTH_LENGTH_SHIFT)
+#define I40E_PRT_MNG_FTFT_MASK(_i) (0x00085160 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRT_MNG_FTFT_MASK_MAX_INDEX 7
+#define I40E_PRT_MNG_FTFT_MASK_MASK_SHIFT 0
+#define I40E_PRT_MNG_FTFT_MASK_MASK_MASK (0xFFFF << I40E_PRT_MNG_FTFT_MASK_MASK_SHIFT)
+#define I40E_PRT_MNG_MANC 0x00256A20
+#define I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_SHIFT 0
+#define I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_MASK (0x1 << I40E_PRT_MNG_MANC_FLOW_CONTROL_DISCARD_SHIFT)
+#define I40E_PRT_MNG_MANC_NCSI_DISCARD_SHIFT 1
+#define I40E_PRT_MNG_MANC_NCSI_DISCARD_MASK (0x1 << I40E_PRT_MNG_MANC_NCSI_DISCARD_SHIFT)
+#define I40E_PRT_MNG_MANC_RCV_TCO_EN_SHIFT 17
+#define I40E_PRT_MNG_MANC_RCV_TCO_EN_MASK (0x1 << I40E_PRT_MNG_MANC_RCV_TCO_EN_SHIFT)
+#define I40E_PRT_MNG_MANC_RCV_ALL_SHIFT 19
+#define I40E_PRT_MNG_MANC_RCV_ALL_MASK (0x1 << I40E_PRT_MNG_MANC_RCV_ALL_SHIFT)
+#define I40E_PRT_MNG_MANC_FIXED_NET_TYPE_SHIFT 25
+#define I40E_PRT_MNG_MANC_FIXED_NET_TYPE_MASK (0x1 << I40E_PRT_MNG_MANC_FIXED_NET_TYPE_SHIFT)
+#define I40E_PRT_MNG_MANC_NET_TYPE_SHIFT 26
+#define I40E_PRT_MNG_MANC_NET_TYPE_MASK (0x1 << I40E_PRT_MNG_MANC_NET_TYPE_SHIFT)
+#define I40E_PRT_MNG_MANC_EN_BMC2OS_SHIFT 28
+#define I40E_PRT_MNG_MANC_EN_BMC2OS_MASK (0x1 << I40E_PRT_MNG_MANC_EN_BMC2OS_SHIFT)
+#define I40E_PRT_MNG_MANC_EN_BMC2NET_SHIFT 29
+#define I40E_PRT_MNG_MANC_EN_BMC2NET_MASK (0x1 << I40E_PRT_MNG_MANC_EN_BMC2NET_SHIFT)
+#define I40E_PRT_MNG_MAVTV(_i) (0x00255900 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRT_MNG_MAVTV_MAX_INDEX 7
+#define I40E_PRT_MNG_MAVTV_VID_SHIFT 0
+#define I40E_PRT_MNG_MAVTV_VID_MASK (0xFFF << I40E_PRT_MNG_MAVTV_VID_SHIFT)
+#define I40E_PRT_MNG_MDEF(_i) (0x00255D00 + ((_i) * 32))
+#define I40E_PRT_MNG_MDEF_MAX_INDEX 7
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_AND_SHIFT 0
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_AND_MASK (0xF << I40E_PRT_MNG_MDEF_MAC_EXACT_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_BROADCAST_AND_SHIFT 4
+#define I40E_PRT_MNG_MDEF_BROADCAST_AND_MASK (0x1 << I40E_PRT_MNG_MDEF_BROADCAST_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_VLAN_AND_SHIFT 5
+#define I40E_PRT_MNG_MDEF_VLAN_AND_MASK (0xFF << I40E_PRT_MNG_MDEF_VLAN_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_SHIFT 13
+#define I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_MASK (0xF << I40E_PRT_MNG_MDEF_IPV4_ADDRESS_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_SHIFT 17
+#define I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_MASK (0xF << I40E_PRT_MNG_MDEF_IPV6_ADDRESS_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_OR_SHIFT 21
+#define I40E_PRT_MNG_MDEF_MAC_EXACT_OR_MASK (0xF << I40E_PRT_MNG_MDEF_MAC_EXACT_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_BROADCAST_OR_SHIFT 25
+#define I40E_PRT_MNG_MDEF_BROADCAST_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_BROADCAST_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_MULTICAST_AND_SHIFT 26
+#define I40E_PRT_MNG_MDEF_MULTICAST_AND_MASK (0x1 << I40E_PRT_MNG_MDEF_MULTICAST_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_SHIFT 27
+#define I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_ARP_REQUEST_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_SHIFT 28
+#define I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_ARP_RESPONSE_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_SHIFT 29
+#define I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_NEIGHBOR_DISCOVERY_134_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_PORT_0X298_OR_SHIFT 30
+#define I40E_PRT_MNG_MDEF_PORT_0X298_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_PORT_0X298_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_PORT_0X26F_OR_SHIFT 31
+#define I40E_PRT_MNG_MDEF_PORT_0X26F_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_PORT_0X26F_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT(_i) (0x00255F00 + ((_i) * 32))
+#define I40E_PRT_MNG_MDEF_EXT_MAX_INDEX 7
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_SHIFT 0
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_MASK (0xF << I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_AND_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_SHIFT 4
+#define I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_MASK (0xF << I40E_PRT_MNG_MDEF_EXT_L2_ETHERTYPE_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_SHIFT 8
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_MASK (0xFFFF << I40E_PRT_MNG_MDEF_EXT_FLEX_PORT_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_SHIFT 24
+#define I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_FLEX_TCO_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_SHIFT 25
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_135_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_SHIFT 26
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_136_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_SHIFT 27
+#define I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_NEIGHBOR_DISCOVERY_137_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_ICMP_OR_SHIFT 28
+#define I40E_PRT_MNG_MDEF_EXT_ICMP_OR_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_ICMP_OR_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_MLD_SHIFT 29
+#define I40E_PRT_MNG_MDEF_EXT_MLD_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_MLD_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_SHIFT 30
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_APPLY_TO_NETWORK_TRAFFIC_SHIFT)
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_SHIFT 31
+#define I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_MASK (0x1 << I40E_PRT_MNG_MDEF_EXT_APPLY_TO_HOST_TRAFFIC_SHIFT)
+#define I40E_PRT_MNG_MDEFVSI(_i) (0x00256580 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRT_MNG_MDEFVSI_MAX_INDEX 3
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_SHIFT 0
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_MASK (0xFFFF << I40E_PRT_MNG_MDEFVSI_MDEFVSI_2N_SHIFT)
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_SHIFT 16
+#define I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_MASK (0xFFFF << I40E_PRT_MNG_MDEFVSI_MDEFVSI_2NP1_SHIFT)
+#define I40E_PRT_MNG_METF(_i) (0x00256780 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRT_MNG_METF_MAX_INDEX 3
+#define I40E_PRT_MNG_METF_ETYPE_SHIFT 0
+#define I40E_PRT_MNG_METF_ETYPE_MASK (0xFFFF << I40E_PRT_MNG_METF_ETYPE_SHIFT)
+#define I40E_PRT_MNG_METF_POLARITY_SHIFT 30
+#define I40E_PRT_MNG_METF_POLARITY_MASK (0x1 << I40E_PRT_MNG_METF_POLARITY_SHIFT)
+#define I40E_PRT_MNG_MFUTP(_i) (0x00254E00 + ((_i) * 32)) /* _i=0...15 */
+#define I40E_PRT_MNG_MFUTP_MAX_INDEX 15
+#define I40E_PRT_MNG_MFUTP_MFUTP_N_SHIFT 0
+#define I40E_PRT_MNG_MFUTP_MFUTP_N_MASK (0xFFFF << I40E_PRT_MNG_MFUTP_MFUTP_N_SHIFT)
+#define I40E_PRT_MNG_MFUTP_UDP_SHIFT 16
+#define I40E_PRT_MNG_MFUTP_UDP_MASK (0x1 << I40E_PRT_MNG_MFUTP_UDP_SHIFT)
+#define I40E_PRT_MNG_MFUTP_TCP_SHIFT 17
+#define I40E_PRT_MNG_MFUTP_TCP_MASK (0x1 << I40E_PRT_MNG_MFUTP_TCP_SHIFT)
+#define I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_SHIFT 18
+#define I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_MASK (0x1 << I40E_PRT_MNG_MFUTP_SOURCE_DESTINATION_SHIFT)
+#define I40E_PRT_MNG_MIPAF4(_i) (0x00256280 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRT_MNG_MIPAF4_MAX_INDEX 3
+#define I40E_PRT_MNG_MIPAF4_MIPAF_SHIFT 0
+#define I40E_PRT_MNG_MIPAF4_MIPAF_MASK (0xFFFFFFFF << I40E_PRT_MNG_MIPAF4_MIPAF_SHIFT)
+#define I40E_PRT_MNG_MIPAF6(_i) (0x00254200 + ((_i) * 32)) /* _i=0...15 */
+#define I40E_PRT_MNG_MIPAF6_MAX_INDEX 15
+#define I40E_PRT_MNG_MIPAF6_MIPAF_SHIFT 0
+#define I40E_PRT_MNG_MIPAF6_MIPAF_MASK (0xFFFFFFFF << I40E_PRT_MNG_MIPAF6_MIPAF_SHIFT)
+#define I40E_PRT_MNG_MMAH(_i) (0x00256380 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRT_MNG_MMAH_MAX_INDEX 3
+#define I40E_PRT_MNG_MMAH_MMAH_SHIFT 0
+#define I40E_PRT_MNG_MMAH_MMAH_MASK (0xFFFF << I40E_PRT_MNG_MMAH_MMAH_SHIFT)
+#define I40E_PRT_MNG_MMAL(_i) (0x00256480 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRT_MNG_MMAL_MAX_INDEX 3
+#define I40E_PRT_MNG_MMAL_MMAL_SHIFT 0
+#define I40E_PRT_MNG_MMAL_MMAL_MASK (0xFFFFFFFF << I40E_PRT_MNG_MMAL_MMAL_SHIFT)
+#define I40E_PRT_MNG_MNGONLY 0x00256A60
+#define I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_SHIFT 0
+#define I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_MASK (0xFF << I40E_PRT_MNG_MNGONLY_EXCLUSIVE_TO_MANAGEABILITY_SHIFT)
+#define I40E_PRT_MNG_MSFM 0x00256AA0
+#define I40E_PRT_MNG_MSFM_PORT_26F_UDP_SHIFT 0
+#define I40E_PRT_MNG_MSFM_PORT_26F_UDP_MASK (0x1 << I40E_PRT_MNG_MSFM_PORT_26F_UDP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_26F_TCP_SHIFT 1
+#define I40E_PRT_MNG_MSFM_PORT_26F_TCP_MASK (0x1 << I40E_PRT_MNG_MSFM_PORT_26F_TCP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_298_UDP_SHIFT 2
+#define I40E_PRT_MNG_MSFM_PORT_298_UDP_MASK (0x1 << I40E_PRT_MNG_MSFM_PORT_298_UDP_SHIFT)
+#define I40E_PRT_MNG_MSFM_PORT_298_TCP_SHIFT 3
+#define I40E_PRT_MNG_MSFM_PORT_298_TCP_MASK (0x1 << I40E_PRT_MNG_MSFM_PORT_298_TCP_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_0_MASK_SHIFT 4
+#define I40E_PRT_MNG_MSFM_IPV6_0_MASK_MASK (0x1 << I40E_PRT_MNG_MSFM_IPV6_0_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_1_MASK_SHIFT 5
+#define I40E_PRT_MNG_MSFM_IPV6_1_MASK_MASK (0x1 << I40E_PRT_MNG_MSFM_IPV6_1_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_2_MASK_SHIFT 6
+#define I40E_PRT_MNG_MSFM_IPV6_2_MASK_MASK (0x1 << I40E_PRT_MNG_MSFM_IPV6_2_MASK_SHIFT)
+#define I40E_PRT_MNG_MSFM_IPV6_3_MASK_SHIFT 7
+#define I40E_PRT_MNG_MSFM_IPV6_3_MASK_MASK (0x1 << I40E_PRT_MNG_MSFM_IPV6_3_MASK_SHIFT)
+#define I40E_MSIX_PBA(_i) (0x00004900 + ((_i) * 4)) /* _i=0...5 */
+#define I40E_MSIX_PBA_MAX_INDEX 5
+#define I40E_MSIX_PBA_PENBIT_SHIFT 0
+#define I40E_MSIX_PBA_PENBIT_MASK (0xFFFFFFFF << I40E_MSIX_PBA_PENBIT_SHIFT)
+#define I40E_MSIX_TADD(_i) (0x00000000 + ((_i) * 16)) /* _i=0...128 */
+#define I40E_MSIX_TADD_MAX_INDEX 128
+#define I40E_MSIX_TADD_MSIXTADD10_SHIFT 0
+#define I40E_MSIX_TADD_MSIXTADD10_MASK (0x3 << I40E_MSIX_TADD_MSIXTADD10_SHIFT)
+#define I40E_MSIX_TADD_MSIXTADD_SHIFT 2
+#define I40E_MSIX_TADD_MSIXTADD_MASK (0x3FFFFFFF << I40E_MSIX_TADD_MSIXTADD_SHIFT)
+#define I40E_MSIX_TMSG(_i) (0x00000008 + ((_i) * 16)) /* _i=0...128 */
+#define I40E_MSIX_TMSG_MAX_INDEX 128
+#define I40E_MSIX_TMSG_MSIXTMSG_SHIFT 0
+#define I40E_MSIX_TMSG_MSIXTMSG_MASK (0xFFFFFFFF << I40E_MSIX_TMSG_MSIXTMSG_SHIFT)
+#define I40E_MSIX_TUADD(_i) (0x00000004 + ((_i) * 16)) /* _i=0...128 */
+#define I40E_MSIX_TUADD_MAX_INDEX 128
+#define I40E_MSIX_TUADD_MSIXTUADD_SHIFT 0
+#define I40E_MSIX_TUADD_MSIXTUADD_MASK (0xFFFFFFFF << I40E_MSIX_TUADD_MSIXTUADD_SHIFT)
+#define I40E_MSIX_TVCTRL(_i) (0x0000000C + ((_i) * 16)) /* _i=0...128 */
+#define I40E_MSIX_TVCTRL_MAX_INDEX 128
+#define I40E_MSIX_TVCTRL_MASK_SHIFT 0
+#define I40E_MSIX_TVCTRL_MASK_MASK (0x1 << I40E_MSIX_TVCTRL_MASK_SHIFT)
+#define I40E_VFMSIX_PBA1(_i) (0x00004944 + ((_i) * 4)) /* _i=0...19 */
+#define I40E_VFMSIX_PBA1_MAX_INDEX 19
+#define I40E_VFMSIX_PBA1_PENBIT_SHIFT 0
+#define I40E_VFMSIX_PBA1_PENBIT_MASK (0xFFFFFFFF << I40E_VFMSIX_PBA1_PENBIT_SHIFT)
+#define I40E_VFMSIX_TADD1(_i) (0x00002100 + ((_i) * 16)) /* _i=0...639 */
+#define I40E_VFMSIX_TADD1_MAX_INDEX 639
+#define I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT 0
+#define I40E_VFMSIX_TADD1_MSIXTADD10_MASK (0x3 << I40E_VFMSIX_TADD1_MSIXTADD10_SHIFT)
+#define I40E_VFMSIX_TADD1_MSIXTADD_SHIFT 2
+#define I40E_VFMSIX_TADD1_MSIXTADD_MASK (0x3FFFFFFF << I40E_VFMSIX_TADD1_MSIXTADD_SHIFT)
+#define I40E_VFMSIX_TMSG1(_i) (0x00002108 + ((_i) * 16)) /* _i=0...639 */
+#define I40E_VFMSIX_TMSG1_MAX_INDEX 639
+#define I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT 0
+#define I40E_VFMSIX_TMSG1_MSIXTMSG_MASK (0xFFFFFFFF << I40E_VFMSIX_TMSG1_MSIXTMSG_SHIFT)
+#define I40E_VFMSIX_TUADD1(_i) (0x00002104 + ((_i) * 16)) /* _i=0...639 */
+#define I40E_VFMSIX_TUADD1_MAX_INDEX 639
+#define I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT 0
+#define I40E_VFMSIX_TUADD1_MSIXTUADD_MASK (0xFFFFFFFF << I40E_VFMSIX_TUADD1_MSIXTUADD_SHIFT)
+#define I40E_VFMSIX_TVCTRL1(_i) (0x0000210C + ((_i) * 16)) /* _i=0...639 */
+#define I40E_VFMSIX_TVCTRL1_MAX_INDEX 639
+#define I40E_VFMSIX_TVCTRL1_MASK_SHIFT 0
+#define I40E_VFMSIX_TVCTRL1_MASK_MASK (0x1 << I40E_VFMSIX_TVCTRL1_MASK_SHIFT)
+#define I40E_GLNVM_FLA 0x000B6108
+#define I40E_GLNVM_FLA_FL_SCK_SHIFT 0
+#define I40E_GLNVM_FLA_FL_SCK_MASK (0x1 << I40E_GLNVM_FLA_FL_SCK_SHIFT)
+#define I40E_GLNVM_FLA_FL_CE_SHIFT 1
+#define I40E_GLNVM_FLA_FL_CE_MASK (0x1 << I40E_GLNVM_FLA_FL_CE_SHIFT)
+#define I40E_GLNVM_FLA_FL_SI_SHIFT 2
+#define I40E_GLNVM_FLA_FL_SI_MASK (0x1 << I40E_GLNVM_FLA_FL_SI_SHIFT)
+#define I40E_GLNVM_FLA_FL_SO_SHIFT 3
+#define I40E_GLNVM_FLA_FL_SO_MASK (0x1 << I40E_GLNVM_FLA_FL_SO_SHIFT)
+#define I40E_GLNVM_FLA_FL_REQ_SHIFT 4
+#define I40E_GLNVM_FLA_FL_REQ_MASK (0x1 << I40E_GLNVM_FLA_FL_REQ_SHIFT)
+#define I40E_GLNVM_FLA_FL_GNT_SHIFT 5
+#define I40E_GLNVM_FLA_FL_GNT_MASK (0x1 << I40E_GLNVM_FLA_FL_GNT_SHIFT)
+#define I40E_GLNVM_FLA_LOCKED_SHIFT 6
+#define I40E_GLNVM_FLA_LOCKED_MASK (0x1 << I40E_GLNVM_FLA_LOCKED_SHIFT)
+#define I40E_GLNVM_FLA_FL_SADDR_SHIFT 18
+#define I40E_GLNVM_FLA_FL_SADDR_MASK (0x7FF << I40E_GLNVM_FLA_FL_SADDR_SHIFT)
+#define I40E_GLNVM_FLA_FL_BUSY_SHIFT 30
+#define I40E_GLNVM_FLA_FL_BUSY_MASK (0x1 << I40E_GLNVM_FLA_FL_BUSY_SHIFT)
+#define I40E_GLNVM_FLA_FL_DER_SHIFT 31
+#define I40E_GLNVM_FLA_FL_DER_MASK (0x1 << I40E_GLNVM_FLA_FL_DER_SHIFT)
+#define I40E_GLNVM_FLASHID 0x000B6104
+#define I40E_GLNVM_FLASHID_FLASHID_SHIFT 0
+#define I40E_GLNVM_FLASHID_FLASHID_MASK (0xFFFFFF << I40E_GLNVM_FLASHID_FLASHID_SHIFT)
+#define I40E_GLNVM_GENS 0x000B6100
+#define I40E_GLNVM_GENS_NVM_PRES_SHIFT 0
+#define I40E_GLNVM_GENS_NVM_PRES_MASK (0x1 << I40E_GLNVM_GENS_NVM_PRES_SHIFT)
+#define I40E_GLNVM_GENS_SR_SIZE_SHIFT 5
+#define I40E_GLNVM_GENS_SR_SIZE_MASK (0x7 << I40E_GLNVM_GENS_SR_SIZE_SHIFT)
+#define I40E_GLNVM_GENS_BANK1VAL_SHIFT 8
+#define I40E_GLNVM_GENS_BANK1VAL_MASK (0x1 << I40E_GLNVM_GENS_BANK1VAL_SHIFT)
+#define I40E_GLNVM_GENS_ALT_PRST_SHIFT 23
+#define I40E_GLNVM_GENS_ALT_PRST_MASK (0x1 << I40E_GLNVM_GENS_ALT_PRST_SHIFT)
+#define I40E_GLNVM_GENS_FL_AUTO_RD_SHIFT 25
+#define I40E_GLNVM_GENS_FL_AUTO_RD_MASK (0x1 << I40E_GLNVM_GENS_FL_AUTO_RD_SHIFT)
+#define I40E_GLNVM_PROTCSR(_i) (0x000B6010 + ((_i) * 4)) /* _i=0...59 */
+#define I40E_GLNVM_PROTCSR_MAX_INDEX 59
+#define I40E_GLNVM_PROTCSR_ADDR_BLOCK_SHIFT 0
+#define I40E_GLNVM_PROTCSR_ADDR_BLOCK_MASK (0xFFFFFF << I40E_GLNVM_PROTCSR_ADDR_BLOCK_SHIFT)
+#define I40E_GLNVM_SRCTL 0x000B6110
+#define I40E_GLNVM_SRCTL_SRBUSY_SHIFT 0
+#define I40E_GLNVM_SRCTL_SRBUSY_MASK (0x1 << I40E_GLNVM_SRCTL_SRBUSY_SHIFT)
+#define I40E_GLNVM_SRCTL_ADDR_SHIFT 14
+#define I40E_GLNVM_SRCTL_ADDR_MASK (0x7FFF << I40E_GLNVM_SRCTL_ADDR_SHIFT)
+#define I40E_GLNVM_SRCTL_WRITE_SHIFT 29
+#define I40E_GLNVM_SRCTL_WRITE_MASK (0x1 << I40E_GLNVM_SRCTL_WRITE_SHIFT)
+#define I40E_GLNVM_SRCTL_START_SHIFT 30
+#define I40E_GLNVM_SRCTL_START_MASK (0x1 << I40E_GLNVM_SRCTL_START_SHIFT)
+#define I40E_GLNVM_SRCTL_DONE_SHIFT 31
+#define I40E_GLNVM_SRCTL_DONE_MASK (0x1 << I40E_GLNVM_SRCTL_DONE_SHIFT)
+#define I40E_GLNVM_SRDATA 0x000B6114
+#define I40E_GLNVM_SRDATA_WRDATA_SHIFT 0
+#define I40E_GLNVM_SRDATA_WRDATA_MASK (0xFFFF << I40E_GLNVM_SRDATA_WRDATA_SHIFT)
+#define I40E_GLNVM_SRDATA_RDDATA_SHIFT 16
+#define I40E_GLNVM_SRDATA_RDDATA_MASK (0xFFFF << I40E_GLNVM_SRDATA_RDDATA_SHIFT)
+#define I40E_GLPCI_BYTCTH 0x0009C484
+#define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT 0
+#define I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_MASK (0xFFFFFFFF << I40E_GLPCI_BYTCTH_PCI_COUNT_BW_BCT_SHIFT)
+#define I40E_GLPCI_BYTCTL 0x0009C488
+#define I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_SHIFT 0
+#define I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_MASK (0xFFFFFFFF << I40E_GLPCI_BYTCTL_PCI_COUNT_BW_BCT_SHIFT)
+#define I40E_GLPCI_CAPCTRL 0x000BE4A4
+#define I40E_GLPCI_CAPCTRL_VPD_EN_SHIFT 0
+#define I40E_GLPCI_CAPCTRL_VPD_EN_MASK (0x1 << I40E_GLPCI_CAPCTRL_VPD_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP 0x000BE4A8
+#define I40E_GLPCI_CAPSUP_PCIE_VER_SHIFT 0
+#define I40E_GLPCI_CAPSUP_PCIE_VER_MASK (0x1 << I40E_GLPCI_CAPSUP_PCIE_VER_SHIFT)
+#define I40E_GLPCI_CAPSUP_LTR_EN_SHIFT 2
+#define I40E_GLPCI_CAPSUP_LTR_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_LTR_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_TPH_EN_SHIFT 3
+#define I40E_GLPCI_CAPSUP_TPH_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_TPH_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ARI_EN_SHIFT 4
+#define I40E_GLPCI_CAPSUP_ARI_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_ARI_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_IOV_EN_SHIFT 5
+#define I40E_GLPCI_CAPSUP_IOV_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_IOV_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ACS_EN_SHIFT 6
+#define I40E_GLPCI_CAPSUP_ACS_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_ACS_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_SEC_EN_SHIFT 7
+#define I40E_GLPCI_CAPSUP_SEC_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_SEC_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ECRC_GEN_EN_SHIFT 16
+#define I40E_GLPCI_CAPSUP_ECRC_GEN_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_ECRC_GEN_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_ECRC_CHK_EN_SHIFT 17
+#define I40E_GLPCI_CAPSUP_ECRC_CHK_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_ECRC_CHK_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_IDO_EN_SHIFT 18
+#define I40E_GLPCI_CAPSUP_IDO_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_IDO_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_MSI_MASK_SHIFT 19
+#define I40E_GLPCI_CAPSUP_MSI_MASK_MASK (0x1 << I40E_GLPCI_CAPSUP_MSI_MASK_SHIFT)
+#define I40E_GLPCI_CAPSUP_CSR_CONF_EN_SHIFT 20
+#define I40E_GLPCI_CAPSUP_CSR_CONF_EN_MASK (0x1 << I40E_GLPCI_CAPSUP_CSR_CONF_EN_SHIFT)
+#define I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_SHIFT 30
+#define I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_MASK (0x1 << I40E_GLPCI_CAPSUP_LOAD_SUBSYS_ID_SHIFT)
+#define I40E_GLPCI_CAPSUP_LOAD_DEV_ID_SHIFT 31
+#define I40E_GLPCI_CAPSUP_LOAD_DEV_ID_MASK (0x1 << I40E_GLPCI_CAPSUP_LOAD_DEV_ID_SHIFT)
+#define I40E_GLPCI_CNF 0x000BE4C0
+#define I40E_GLPCI_CNF_FLEX10_SHIFT 1
+#define I40E_GLPCI_CNF_FLEX10_MASK (0x1 << I40E_GLPCI_CNF_FLEX10_SHIFT)
+#define I40E_GLPCI_CNF_WAKE_PIN_EN_SHIFT 2
+#define I40E_GLPCI_CNF_WAKE_PIN_EN_MASK (0x1 << I40E_GLPCI_CNF_WAKE_PIN_EN_SHIFT)
+#define I40E_GLPCI_CNF2 0x000BE494
+#define I40E_GLPCI_CNF2_RO_DIS_SHIFT 0
+#define I40E_GLPCI_CNF2_RO_DIS_MASK (0x1 << I40E_GLPCI_CNF2_RO_DIS_SHIFT)
+#define I40E_GLPCI_CNF2_CACHELINE_SIZE_SHIFT 1
+#define I40E_GLPCI_CNF2_CACHELINE_SIZE_MASK (0x1 << I40E_GLPCI_CNF2_CACHELINE_SIZE_SHIFT)
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT 2
+#define I40E_GLPCI_CNF2_MSI_X_PF_N_MASK (0x7FF << I40E_GLPCI_CNF2_MSI_X_PF_N_SHIFT)
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT 13
+#define I40E_GLPCI_CNF2_MSI_X_VF_N_MASK (0x7FF << I40E_GLPCI_CNF2_MSI_X_VF_N_SHIFT)
+#define I40E_GLPCI_DREVID 0x0009C480
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT 0
+#define I40E_GLPCI_DREVID_DEFAULT_REVID_MASK (0xFF << I40E_GLPCI_DREVID_DEFAULT_REVID_SHIFT)
+#define I40E_GLPCI_GSCL_1 0x0009C48C
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_SHIFT 0
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_EN_0_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_SHIFT 1
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_EN_1_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_SHIFT 2
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_EN_2_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_SHIFT 3
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_EN_3_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_0_SHIFT 4
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_0_MASK (0x1 << I40E_GLPCI_GSCL_1_LBC_ENABLE_0_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_1_SHIFT 5
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_1_MASK (0x1 << I40E_GLPCI_GSCL_1_LBC_ENABLE_1_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_2_SHIFT 6
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_2_MASK (0x1 << I40E_GLPCI_GSCL_1_LBC_ENABLE_2_SHIFT)
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_3_SHIFT 7
+#define I40E_GLPCI_GSCL_1_LBC_ENABLE_3_MASK (0x1 << I40E_GLPCI_GSCL_1_LBC_ENABLE_3_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_SHIFT 8
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_MASK (0x1 << I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_SHIFT 9
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_MASK (0x1F << I40E_GLPCI_GSCL_1_PCI_COUNT_LAT_EV_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_SHIFT 14
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_MASK (0x1 << I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_SHIFT 15
+#define I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_MASK (0x1F << I40E_GLPCI_GSCL_1_PCI_COUNT_BW_EV_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_SHIFT 28
+#define I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_64_BIT_EN_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_SHIFT 29
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_RESET_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_SHIFT 30
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_STOP_SHIFT)
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_START_SHIFT 31
+#define I40E_GLPCI_GSCL_1_GIO_COUNT_START_MASK (0x1 << I40E_GLPCI_GSCL_1_GIO_COUNT_START_SHIFT)
+#define I40E_GLPCI_GSCL_2 0x0009C490
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_SHIFT 0
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_MASK (0xFF << I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_0_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_SHIFT 8
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_MASK (0xFF << I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_1_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_SHIFT 16
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_MASK (0xFF << I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_2_SHIFT)
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_SHIFT 24
+#define I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_MASK (0xFF << I40E_GLPCI_GSCL_2_GIO_EVENT_NUM_3_SHIFT)
+#define I40E_GLPCI_GSCL_5_8(_i) (0x0009C494 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLPCI_GSCL_5_8_MAX_INDEX 3
+#define I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_SHIFT 0
+#define I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_MASK (0xFFFF << I40E_GLPCI_GSCL_5_8_LBC_THRESHOLD_N_SHIFT)
+#define I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_SHIFT 16
+#define I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_MASK (0xFFFF << I40E_GLPCI_GSCL_5_8_LBC_TIMER_N_SHIFT)
+#define I40E_GLPCI_GSCN_0_3(_i) (0x0009C4A4 + ((_i) * 4)) /* _i=0...3 */
+#define I40E_GLPCI_GSCN_0_3_MAX_INDEX 3
+#define I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_SHIFT 0
+#define I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_MASK (0xFFFFFFFF << I40E_GLPCI_GSCN_0_3_EVENT_COUNTER_SHIFT)
+#define I40E_GLPCI_LATCT 0x0009C4B4
+#define I40E_GLPCI_LATCT_PCI_COUNT_LAT_CT_SHIFT 0
+#define I40E_GLPCI_LATCT_PCI_COUNT_LAT_CT_MASK (0xFFFFFFFF << I40E_GLPCI_LATCT_PCI_COUNT_LAT_CT_SHIFT)
+#define I40E_GLPCI_LBARCTRL 0x000BE484
+#define I40E_GLPCI_LBARCTRL_PREFBAR_SHIFT 0
+#define I40E_GLPCI_LBARCTRL_PREFBAR_MASK (0x1 << I40E_GLPCI_LBARCTRL_PREFBAR_SHIFT)
+#define I40E_GLPCI_LBARCTRL_BAR32_SHIFT 1
+#define I40E_GLPCI_LBARCTRL_BAR32_MASK (0x1 << I40E_GLPCI_LBARCTRL_BAR32_SHIFT)
+#define I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_SHIFT 3
+#define I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_MASK (0x1 << I40E_GLPCI_LBARCTRL_FLASH_EXPOSE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT 4
+#define I40E_GLPCI_LBARCTRL_PE_DB_SIZE_MASK (0x3 << I40E_GLPCI_LBARCTRL_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT 6
+#define I40E_GLPCI_LBARCTRL_FL_SIZE_MASK (0x7 << I40E_GLPCI_LBARCTRL_FL_SIZE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_SHIFT 10
+#define I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_MASK (0x1 << I40E_GLPCI_LBARCTRL_VF_PE_DB_SIZE_SHIFT)
+#define I40E_GLPCI_LBARCTRL_EXROM_SIZE_SHIFT 11
+#define I40E_GLPCI_LBARCTRL_EXROM_SIZE_MASK (0x7 << I40E_GLPCI_LBARCTRL_EXROM_SIZE_SHIFT)
+#define I40E_GLPCI_LINKCAP 0x000BE4AC
+#define I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_SHIFT 0
+#define I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_MASK (0x3F << I40E_GLPCI_LINKCAP_LINK_SPEEDS_VECTOR_SHIFT)
+#define I40E_GLPCI_LINKCAP_MAX_PAYLOAD_SHIFT 6
+#define I40E_GLPCI_LINKCAP_MAX_PAYLOAD_MASK (0x7 << I40E_GLPCI_LINKCAP_MAX_PAYLOAD_SHIFT)
+#define I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_SHIFT 9
+#define I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_MASK (0xF << I40E_GLPCI_LINKCAP_MAX_LINK_WIDTH_SHIFT)
+#define I40E_GLPCI_PCIERR 0x000BE4FC
+#define I40E_GLPCI_PCIERR_PCIE_ERR_REP_SHIFT 0
+#define I40E_GLPCI_PCIERR_PCIE_ERR_REP_MASK (0xFFFFFFFF << I40E_GLPCI_PCIERR_PCIE_ERR_REP_SHIFT)
+#define I40E_GLPCI_PKTCT 0x0009C4BC
+#define I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_SHIFT 0
+#define I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_MASK (0xFFFFFFFF << I40E_GLPCI_PKTCT_PCI_COUNT_BW_PCT_SHIFT)
+#define I40E_GLPCI_PMSUP 0x000BE4B0
+#define I40E_GLPCI_PMSUP_ASPM_SUP_SHIFT 0
+#define I40E_GLPCI_PMSUP_ASPM_SUP_MASK (0x3 << I40E_GLPCI_PMSUP_ASPM_SUP_SHIFT)
+#define I40E_GLPCI_PMSUP_L0S_EXIT_LAT_SHIFT 2
+#define I40E_GLPCI_PMSUP_L0S_EXIT_LAT_MASK (0x7 << I40E_GLPCI_PMSUP_L0S_EXIT_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L1_EXIT_LAT_SHIFT 5
+#define I40E_GLPCI_PMSUP_L1_EXIT_LAT_MASK (0x7 << I40E_GLPCI_PMSUP_L1_EXIT_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L0S_ACC_LAT_SHIFT 8
+#define I40E_GLPCI_PMSUP_L0S_ACC_LAT_MASK (0x7 << I40E_GLPCI_PMSUP_L0S_ACC_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_L1_ACC_LAT_SHIFT 11
+#define I40E_GLPCI_PMSUP_L1_ACC_LAT_MASK (0x7 << I40E_GLPCI_PMSUP_L1_ACC_LAT_SHIFT)
+#define I40E_GLPCI_PMSUP_SLOT_CLK_SHIFT 14
+#define I40E_GLPCI_PMSUP_SLOT_CLK_MASK (0x1 << I40E_GLPCI_PMSUP_SLOT_CLK_SHIFT)
+#define I40E_GLPCI_PMSUP_OBFF_SUP_SHIFT 15
+#define I40E_GLPCI_PMSUP_OBFF_SUP_MASK (0x3 << I40E_GLPCI_PMSUP_OBFF_SUP_SHIFT)
+#define I40E_GLPCI_PWRDATA 0x000BE490
+#define I40E_GLPCI_PWRDATA_D0_POWER_SHIFT 0
+#define I40E_GLPCI_PWRDATA_D0_POWER_MASK (0xFF << I40E_GLPCI_PWRDATA_D0_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_COMM_POWER_SHIFT 8
+#define I40E_GLPCI_PWRDATA_COMM_POWER_MASK (0xFF << I40E_GLPCI_PWRDATA_COMM_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_D3_POWER_SHIFT 16
+#define I40E_GLPCI_PWRDATA_D3_POWER_MASK (0xFF << I40E_GLPCI_PWRDATA_D3_POWER_SHIFT)
+#define I40E_GLPCI_PWRDATA_DATA_SCALE_SHIFT 24
+#define I40E_GLPCI_PWRDATA_DATA_SCALE_MASK (0x3 << I40E_GLPCI_PWRDATA_DATA_SCALE_SHIFT)
+#define I40E_GLPCI_REVID 0x000BE4B4
+#define I40E_GLPCI_REVID_NVM_REVID_SHIFT 0
+#define I40E_GLPCI_REVID_NVM_REVID_MASK (0xFF << I40E_GLPCI_REVID_NVM_REVID_SHIFT)
+#define I40E_GLPCI_SERH 0x000BE49C
+#define I40E_GLPCI_SERH_SER_NUM_H_SHIFT 0
+#define I40E_GLPCI_SERH_SER_NUM_H_MASK (0xFFFF << I40E_GLPCI_SERH_SER_NUM_H_SHIFT)
+#define I40E_GLPCI_SERL 0x000BE498
+#define I40E_GLPCI_SERL_SER_NUM_L_SHIFT 0
+#define I40E_GLPCI_SERL_SER_NUM_L_MASK (0xFFFFFFFF << I40E_GLPCI_SERL_SER_NUM_L_SHIFT)
+#define I40E_GLPCI_SUBSYSID 0x000BE48C
+#define I40E_GLPCI_SUBSYSID_SUB_VEN_ID_SHIFT 0
+#define I40E_GLPCI_SUBSYSID_SUB_VEN_ID_MASK (0xFFFF << I40E_GLPCI_SUBSYSID_SUB_VEN_ID_SHIFT)
+#define I40E_GLPCI_SUBSYSID_SUB_ID_SHIFT 16
+#define I40E_GLPCI_SUBSYSID_SUB_ID_MASK (0xFFFF << I40E_GLPCI_SUBSYSID_SUB_ID_SHIFT)
+#define I40E_GLPCI_UPADD 0x000BE4F8
+#define I40E_GLPCI_UPADD_ADDRESS_SHIFT 1
+#define I40E_GLPCI_UPADD_ADDRESS_MASK (0x7FFFFFFF << I40E_GLPCI_UPADD_ADDRESS_SHIFT)
+#define I40E_GLPCI_VFSUP 0x000BE4B8
+#define I40E_GLPCI_VFSUP_VF_PREFETCH_SHIFT 0
+#define I40E_GLPCI_VFSUP_VF_PREFETCH_MASK (0x1 << I40E_GLPCI_VFSUP_VF_PREFETCH_SHIFT)
+#define I40E_GLPCI_VFSUP_VR_BAR_TYPE_SHIFT 1
+#define I40E_GLPCI_VFSUP_VR_BAR_TYPE_MASK (0x1 << I40E_GLPCI_VFSUP_VR_BAR_TYPE_SHIFT)
+#define I40E_PF_FUNC_RID 0x0009C000
+#define I40E_PF_FUNC_RID_FUNCTION_NUMBER_SHIFT 0
+#define I40E_PF_FUNC_RID_FUNCTION_NUMBER_MASK (0x7 << I40E_PF_FUNC_RID_FUNCTION_NUMBER_SHIFT)
+#define I40E_PF_FUNC_RID_DEVICE_NUMBER_SHIFT 3
+#define I40E_PF_FUNC_RID_DEVICE_NUMBER_MASK (0x1F << I40E_PF_FUNC_RID_DEVICE_NUMBER_SHIFT)
+#define I40E_PF_FUNC_RID_BUS_NUMBER_SHIFT 8
+#define I40E_PF_FUNC_RID_BUS_NUMBER_MASK (0xFF << I40E_PF_FUNC_RID_BUS_NUMBER_SHIFT)
+#define I40E_PF_PCI_CIAA 0x0009C080
+#define I40E_PF_PCI_CIAA_ADDRESS_SHIFT 0
+#define I40E_PF_PCI_CIAA_ADDRESS_MASK (0xFFF << I40E_PF_PCI_CIAA_ADDRESS_SHIFT)
+#define I40E_PF_PCI_CIAA_VF_NUM_SHIFT 12
+#define I40E_PF_PCI_CIAA_VF_NUM_MASK (0x7F << I40E_PF_PCI_CIAA_VF_NUM_SHIFT)
+#define I40E_PF_PCI_CIAD 0x0009C100
+#define I40E_PF_PCI_CIAD_DATA_SHIFT 0
+#define I40E_PF_PCI_CIAD_DATA_MASK (0xFFFFFFFF << I40E_PF_PCI_CIAD_DATA_SHIFT)
+#define I40E_PFPCI_CLASS 0x000BE400
+#define I40E_PFPCI_CLASS_STORAGE_CLASS_SHIFT 0
+#define I40E_PFPCI_CLASS_STORAGE_CLASS_MASK (0x1 << I40E_PFPCI_CLASS_STORAGE_CLASS_SHIFT)
+#define I40E_PFPCI_CNF 0x000BE000
+#define I40E_PFPCI_CNF_MSI_EN_SHIFT 2
+#define I40E_PFPCI_CNF_MSI_EN_MASK (0x1 << I40E_PFPCI_CNF_MSI_EN_SHIFT)
+#define I40E_PFPCI_CNF_EXROM_DIS_SHIFT 3
+#define I40E_PFPCI_CNF_EXROM_DIS_MASK (0x1 << I40E_PFPCI_CNF_EXROM_DIS_SHIFT)
+#define I40E_PFPCI_CNF_IO_BAR_SHIFT 4
+#define I40E_PFPCI_CNF_IO_BAR_MASK (0x1 << I40E_PFPCI_CNF_IO_BAR_SHIFT)
+#define I40E_PFPCI_CNF_INT_PIN_SHIFT 5
+#define I40E_PFPCI_CNF_INT_PIN_MASK (0x3 << I40E_PFPCI_CNF_INT_PIN_SHIFT)
+#define I40E_PFPCI_FACTPS 0x0009C180
+#define I40E_PFPCI_FACTPS_FUNC_POWER_STATE_SHIFT 0
+#define I40E_PFPCI_FACTPS_FUNC_POWER_STATE_MASK (0x3 << I40E_PFPCI_FACTPS_FUNC_POWER_STATE_SHIFT)
+#define I40E_PFPCI_FACTPS_FUNC_AUX_EN_SHIFT 3
+#define I40E_PFPCI_FACTPS_FUNC_AUX_EN_MASK (0x1 << I40E_PFPCI_FACTPS_FUNC_AUX_EN_SHIFT)
+#define I40E_PFPCI_FUNC 0x000BE200
+#define I40E_PFPCI_FUNC_FUNC_DIS_SHIFT 0
+#define I40E_PFPCI_FUNC_FUNC_DIS_MASK (0x1 << I40E_PFPCI_FUNC_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_SHIFT 1
+#define I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_MASK (0x1 << I40E_PFPCI_FUNC_ALLOW_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_SHIFT 2
+#define I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_MASK (0x1 << I40E_PFPCI_FUNC_DIS_FUNC_ON_PORT_DIS_SHIFT)
+#define I40E_PFPCI_FUNC2 0x000BE180
+#define I40E_PFPCI_FUNC2_EMP_FUNC_DIS_SHIFT 0
+#define I40E_PFPCI_FUNC2_EMP_FUNC_DIS_MASK (0x1 << I40E_PFPCI_FUNC2_EMP_FUNC_DIS_SHIFT)
+#define I40E_PFPCI_ICAUSE 0x0009C200
+#define I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_SHIFT 0
+#define I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_MASK (0xFFFFFFFF << I40E_PFPCI_ICAUSE_PCIE_ERR_CAUSE_SHIFT)
+#define I40E_PFPCI_IENA 0x0009C280
+#define I40E_PFPCI_IENA_PCIE_ERR_EN_SHIFT 0
+#define I40E_PFPCI_IENA_PCIE_ERR_EN_MASK (0xFFFFFFFF << I40E_PFPCI_IENA_PCIE_ERR_EN_SHIFT)
+#define I40E_PFPCI_PFDEVID 0x000BE080
+#define I40E_PFPCI_PFDEVID_PF_DEV_ID_LAN_SHIFT 0
+#define I40E_PFPCI_PFDEVID_PF_DEV_ID_LAN_MASK (0xFFFF << I40E_PFPCI_PFDEVID_PF_DEV_ID_LAN_SHIFT)
+#define I40E_PFPCI_PFDEVID_PF_DEV_ID_SAN_SHIFT 16
+#define I40E_PFPCI_PFDEVID_PF_DEV_ID_SAN_MASK (0xFFFF << I40E_PFPCI_PFDEVID_PF_DEV_ID_SAN_SHIFT)
+#define I40E_PFPCI_PM 0x000BE300
+#define I40E_PFPCI_PM_PME_EN_SHIFT 0
+#define I40E_PFPCI_PM_PME_EN_MASK (0x1 << I40E_PFPCI_PM_PME_EN_SHIFT)
+#define I40E_PFPCI_STATUS1 0x000BE280
+#define I40E_PFPCI_STATUS1_FUNC_VALID_SHIFT 0
+#define I40E_PFPCI_STATUS1_FUNC_VALID_MASK (0x1 << I40E_PFPCI_STATUS1_FUNC_VALID_SHIFT)
+#define I40E_PFPCI_VFDEVID 0x000BE100
+#define I40E_PFPCI_VFDEVID_VF_DEV_ID_LAN_SHIFT 0
+#define I40E_PFPCI_VFDEVID_VF_DEV_ID_LAN_MASK (0xFFFF << I40E_PFPCI_VFDEVID_VF_DEV_ID_LAN_SHIFT)
+#define I40E_PFPCI_VFDEVID_VF_DEV_ID_SAN_SHIFT 16
+#define I40E_PFPCI_VFDEVID_VF_DEV_ID_SAN_MASK (0xFFFF << I40E_PFPCI_VFDEVID_VF_DEV_ID_SAN_SHIFT)
+#define I40E_PFPCI_VMINDEX 0x0009C300
+#define I40E_PFPCI_VMINDEX_VMINDEX_SHIFT 0
+#define I40E_PFPCI_VMINDEX_VMINDEX_MASK (0x1FF << I40E_PFPCI_VMINDEX_VMINDEX_SHIFT)
+#define I40E_PFPCI_VMPEND 0x0009C380
+#define I40E_PFPCI_VMPEND_PENDING_SHIFT 0
+#define I40E_PFPCI_VMPEND_PENDING_MASK (0x1 << I40E_PFPCI_VMPEND_PENDING_SHIFT)
+#define I40E_GLPE_CPUSTATUS0 0x0000D040
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT 0
+#define I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS0_PECPUSTATUS0_SHIFT)
+#define I40E_GLPE_CPUSTATUS1 0x0000D044
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT 0
+#define I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS1_PECPUSTATUS1_SHIFT)
+#define I40E_GLPE_CPUSTATUS2 0x0000D048
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT 0
+#define I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_MASK (0xFFFFFFFF << I40E_GLPE_CPUSTATUS2_PECPUSTATUS2_SHIFT)
+#define I40E_GLPE_PFFLMOBJCTRL(_i) (0x0000D480 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPE_PFFLMOBJCTRL_MAX_INDEX 15
+#define I40E_GLPE_PFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_PFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK (0x7 << I40E_GLPE_PFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_PFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
+#define I40E_GLPE_PFFLMOBJCTRL_Q1_BLOCKSIZE_MASK (0x7 << I40E_GLPE_PFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPE_VFFLMOBJCTRL_MAX_INDEX 31
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT 0
+#define I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_XMIT_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT 8
+#define I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_MASK (0x7 << I40E_GLPE_VFFLMOBJCTRL_Q1_BLOCKSIZE_SHIFT)
+#define I40E_GLPE_VFFLMQ1ALLOCERR(_i) (0x0000C700 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPE_VFFLMQ1ALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFFLMXMITALLOCERR(_i) (0x0000C600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPE_VFFLMXMITALLOCERR_MAX_INDEX 31
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_GLPE_VFFLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_GLPE_VFUDACTRL(_i) (0x0000C000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPE_VFUDACTRL_MAX_INDEX 31
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT 0
+#define I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT 1
+#define I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT 2
+#define I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT 3
+#define I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_MASK (0x1 << I40E_GLPE_VFUDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_MASK (0x1 << I40E_GLPE_VFUDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN(_i) (0x0000C100 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPE_VFUDAUCFBQPN_MAX_INDEX 31
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT 0
+#define I40E_GLPE_VFUDAUCFBQPN_QPN_MASK (0x3FFFF << I40E_GLPE_VFUDAUCFBQPN_QPN_SHIFT)
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT 31
+#define I40E_GLPE_VFUDAUCFBQPN_VALID_MASK (0x1 << I40E_GLPE_VFUDAUCFBQPN_VALID_SHIFT)
+#define I40E_PFPE_AEQALLOC 0x00131180
+#define I40E_PFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_PFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_PFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_PFPE_CCQPHIGH 0x00008200
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_PFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_PFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_PFPE_CCQPLOW 0x00008180
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_PFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_PFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_PFPE_CCQPSTATUS 0x00008100
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_PFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_PFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_PFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_PFPE_CQACK 0x00131100
+#define I40E_PFPE_CQACK_PECQID_SHIFT 0
+#define I40E_PFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_PFPE_CQACK_PECQID_SHIFT)
+#define I40E_PFPE_CQARM 0x00131080
+#define I40E_PFPE_CQARM_PECQID_SHIFT 0
+#define I40E_PFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_PFPE_CQARM_PECQID_SHIFT)
+#define I40E_PFPE_CQPDB 0x00008000
+#define I40E_PFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_PFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_PFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_PFPE_CQPERRCODES 0x00008880
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_PFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_PFPE_CQPTAIL 0x00008080
+#define I40E_PFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_PFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_PFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_PFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_PFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_PFPE_FLMQ1ALLOCERR 0x00008980
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMQ1ALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_FLMXMITALLOCERR 0x00008900
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT 0
+#define I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_MASK (0xFFFF << I40E_PFPE_FLMXMITALLOCERR_ERROR_COUNT_SHIFT)
+#define I40E_PFPE_IPCONFIG0 0x00008280
+#define I40E_PFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_PFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_PFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_PFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_PFPE_IPCONFIG0_USEUPPERIDRANGE_SHIFT 17
+#define I40E_PFPE_IPCONFIG0_USEUPPERIDRANGE_MASK (0x1 << I40E_PFPE_IPCONFIG0_USEUPPERIDRANGE_SHIFT)
+#define I40E_PFPE_MRTEIDXMASK 0x00008600
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_PFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_PFPE_RCVUNEXPECTEDERROR 0x00008680
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_PFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_PFPE_TCPNOWTIMER 0x00008580
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_PFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_PFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_PFPE_UDACTRL 0x00008700
+#define I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_SHIFT 0
+#define I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_MASK (0x1 << I40E_PFPE_UDACTRL_IPV4MCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_SHIFT 1
+#define I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_MASK (0x1 << I40E_PFPE_UDACTRL_IPV4UCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_SHIFT 2
+#define I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_MASK (0x1 << I40E_PFPE_UDACTRL_IPV6MCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_SHIFT 3
+#define I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_MASK (0x1 << I40E_PFPE_UDACTRL_IPV6UCFRAGRESBP_SHIFT)
+#define I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_SHIFT 4
+#define I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_MASK (0x1 << I40E_PFPE_UDACTRL_UDPMCFRAGRESFAIL_SHIFT)
+#define I40E_PFPE_UDAUCFBQPN 0x00008780
+#define I40E_PFPE_UDAUCFBQPN_QPN_SHIFT 0
+#define I40E_PFPE_UDAUCFBQPN_QPN_MASK (0x3FFFF << I40E_PFPE_UDAUCFBQPN_QPN_SHIFT)
+#define I40E_PFPE_UDAUCFBQPN_VALID_SHIFT 31
+#define I40E_PFPE_UDAUCFBQPN_VALID_MASK (0x1 << I40E_PFPE_UDAUCFBQPN_VALID_SHIFT)
+#define I40E_PFPE_WQEALLOC 0x00138C00
+#define I40E_PFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_PFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_PFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_PFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+#define I40E_VFPE_AEQALLOC(_VF) (0x00130C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_AEQALLOC_MAX_INDEX 127
+#define I40E_VFPE_AEQALLOC_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH(_VF) (0x00001000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CCQPHIGH_MAX_INDEX 127
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW(_VF) (0x00000C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CCQPLOW_MAX_INDEX 127
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS(_VF) (0x00000800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CCQPSTATUS_MAX_INDEX 127
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK(_VF) (0x00130800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CQACK_MAX_INDEX 127
+#define I40E_VFPE_CQACK_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK_PECQID_SHIFT)
+#define I40E_VFPE_CQARM(_VF) (0x00130400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CQARM_MAX_INDEX 127
+#define I40E_VFPE_CQARM_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB(_VF) (0x00000000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CQPDB_MAX_INDEX 127
+#define I40E_VFPE_CQPDB_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES(_VF) (0x00001800 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CQPERRCODES_MAX_INDEX 127
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL(_VF) (0x00000400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_CQPTAIL_MAX_INDEX 127
+#define I40E_VFPE_CQPTAIL_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG0(_VF) (0x00001400 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_IPCONFIG0_MAX_INDEX 127
+#define I40E_VFPE_IPCONFIG0_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG0_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG0_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG0_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_IPCONFIG0_USEUPPERIDRANGE_SHIFT 17
+#define I40E_VFPE_IPCONFIG0_USEUPPERIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG0_USEUPPERIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK(_VF) (0x00003000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_MRTEIDXMASK_MAX_INDEX 127
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR(_VF) (0x00003400 + ((_VF) * 4))
+#define I40E_VFPE_RCVUNEXPECTEDERROR_MAX_INDEX 127
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER(_VF) (0x00002C00 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_TCPNOWTIMER_MAX_INDEX 127
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC(_VF) (0x00138000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VFPE_WQEALLOC_MAX_INDEX 127
+#define I40E_VFPE_WQEALLOC_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC_WQE_DESC_INDEX_SHIFT)
+#define I40E_GLPES_PFIP4RXDISCARD(_i) (0x00010600 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSHI(_i) (0x00010804 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXFRAGSLO(_i) (0x00010800 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSHI(_i) (0x00010A04 + ((_i) * 8))
+#define I40E_GLPES_PFIP4RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCOCTSLO(_i) (0x00010A00 + ((_i) * 8))
+#define I40E_GLPES_PFIP4RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSHI(_i) (0x00010C04 + ((_i) * 8))
+#define I40E_GLPES_PFIP4RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXMCPKTSLO(_i) (0x00010C00 + ((_i) * 8))
+#define I40E_GLPES_PFIP4RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSHI(_i) (0x00010204 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXOCTSLO(_i) (0x00010200 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSHI(_i) (0x00010404 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4RXPKTSLO(_i) (0x00010400 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4RXTRUNC(_i) (0x00010700 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSHI(_i) (0x00011E04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXFRAGSLO(_i) (0x00011E00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSHI(_i) (0x00012004 + ((_i) * 8))
+#define I40E_GLPES_PFIP4TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCOCTSLO(_i) (0x00012000 + ((_i) * 8))
+#define I40E_GLPES_PFIP4TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSHI(_i) (0x00012204 + ((_i) * 8))
+#define I40E_GLPES_PFIP4TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXMCPKTSLO(_i) (0x00012200 + ((_i) * 8))
+#define I40E_GLPES_PFIP4TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXNOROUTE(_i) (0x00012E00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSHI(_i) (0x00011A04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXOCTSLO(_i) (0x00011A00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSHI(_i) (0x00011C04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP4TXPKTSLO(_i) (0x00011C00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP4TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXDISCARD(_i) (0x00011200 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXDISCARD_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSHI(_i) (0x00011404 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXFRAGSLO(_i) (0x00011400 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSHI(_i) (0x00011604 + ((_i) * 8))
+#define I40E_GLPES_PFIP6RXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCOCTSLO(_i) (0x00011600 + ((_i) * 8))
+#define I40E_GLPES_PFIP6RXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSHI(_i) (0x00011804 + ((_i) * 8))
+#define I40E_GLPES_PFIP6RXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXMCPKTSLO(_i) (0x00011800 + ((_i) * 8))
+#define I40E_GLPES_PFIP6RXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSHI(_i) (0x00010E04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXOCTSLO(_i) (0x00010E00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSHI(_i) (0x00011004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6RXPKTSLO(_i) (0x00011000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6RXTRUNC(_i) (0x00011300 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6RXTRUNC_MAX_INDEX 15
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSHI(_i) (0x00012804 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXFRAGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXFRAGSLO(_i) (0x00012800 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXFRAGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSHI(_i) (0x00012A04 + ((_i) * 8))
+#define I40E_GLPES_PFIP6TXMCOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCOCTSLO(_i) (0x00012A00 + ((_i) * 8))
+#define I40E_GLPES_PFIP6TXMCOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSHI(_i) (0x00012C04 + ((_i) * 8))
+#define I40E_GLPES_PFIP6TXMCPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXMCPKTSLO(_i) (0x00012C00 + ((_i) * 8))
+#define I40E_GLPES_PFIP6TXMCPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXNOROUTE(_i) (0x00012F00 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXNOROUTE_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_PFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSHI(_i) (0x00012404 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXOCTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXOCTSLO(_i) (0x00012400 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXOCTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSHI(_i) (0x00012604 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_PFIP6TXPKTSLO(_i) (0x00012600 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFIP6TXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSHI(_i) (0x00013E04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXRDSLO(_i) (0x00013E00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSHI(_i) (0x00014004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXSNDSLO(_i) (0x00014000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSHI(_i) (0x00013C04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMARXWRSLO(_i) (0x00013C00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMARXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSHI(_i) (0x00014404 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXRDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXRDSLO(_i) (0x00014400 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXRDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSHI(_i) (0x00014604 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXSNDSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXSNDSLO(_i) (0x00014600 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXSNDSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSHI(_i) (0x00014204 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXWRSHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_PFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_PFRDMATXWRSLO(_i) (0x00014200 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMATXWRSLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDHI(_i) (0x00014804 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMAVBNDHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_PFRDMAVBNDLO(_i) (0x00014800 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMAVBNDLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_PFRDMAVINVHI(_i) (0x00014A04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMAVINVHI_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_PFRDMAVINVLO(_i) (0x00014A00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFRDMAVINVLO_MAX_INDEX 15
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_PFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_PFRXVLANERR(_i) (0x00010000 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFRXVLANERR_MAX_INDEX 15
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_PFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_PFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_PFTCPRTXSEG(_i) (0x00013600 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPRTXSEG_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_PFTCPRXOPTERR(_i) (0x00013200 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPRXOPTERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_PFTCPRXPROTOERR(_i) (0x00013300 + ((_i) * 4))
+#define I40E_GLPES_PFTCPRXPROTOERR_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_PFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSHI(_i) (0x00013004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPRXSEGSHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_PFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_PFTCPRXSEGSLO(_i) (0x00013000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPRXSEGSLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGHI(_i) (0x00013404 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPTXSEGHI_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_PFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_PFTCPTXSEGLO(_i) (0x00013400 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFTCPTXSEGLO_MAX_INDEX 15
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_PFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSHI(_i) (0x00013804 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFUDPRXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPRXPKTSLO(_i) (0x00013800 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFUDPRXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSHI(_i) (0x00013A04 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFUDPTXPKTSHI_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_PFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_PFUDPTXPKTSLO(_i) (0x00013A00 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLPES_PFUDPTXPKTSLO_MAX_INDEX 15
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_PFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSHI 0x0001E014
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXMULTFPDUSHI_RDMARXMULTFPDUSHI_SHIFT)
+#define I40E_GLPES_RDMARXMULTFPDUSLO 0x0001E010
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT 0
+#define I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXMULTFPDUSLO_RDMARXMULTFPDUSLO_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPHI 0x0001E01C
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_MASK (0xFFFFFF << I40E_GLPES_RDMARXOOODDPHI_RDMARXOOODDPHI_SHIFT)
+#define I40E_GLPES_RDMARXOOODDPLO 0x0001E018
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT 0
+#define I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOODDPLO_RDMARXOOODDPLO_SHIFT)
+#define I40E_GLPES_RDMARXOOONOMARK 0x0001E004
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT 0
+#define I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXOOONOMARK_RDMAOOONOMARK_SHIFT)
+#define I40E_GLPES_RDMARXUNALIGN 0x0001E000
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT 0
+#define I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_MASK (0xFFFFFFFF << I40E_GLPES_RDMARXUNALIGN_RDMRXAUNALIGN_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLEHI 0x0001E044
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXFOURHOLEHI_TCPRXFOURHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXFOURHOLELO 0x0001E040
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXFOURHOLELO_TCPRXFOURHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLEHI 0x0001E02C
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXONEHOLEHI_TCPRXONEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXONEHOLELO 0x0001E028
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXONEHOLELO_TCPRXONEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKHI 0x0001E024
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXPUREACKHI_TCPRXPUREACKSHI_SHIFT)
+#define I40E_GLPES_TCPRXPUREACKSLO 0x0001E020
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT 0
+#define I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXPUREACKSLO_TCPRXPUREACKLO_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLEHI 0x0001E03C
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTHREEHOLEHI_TCPRXTHREEHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTHREEHOLELO 0x0001E038
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTHREEHOLELO_TCPRXTHREEHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLEHI 0x0001E034
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_MASK (0xFFFFFF << I40E_GLPES_TCPRXTWOHOLEHI_TCPRXTWOHOLEHI_SHIFT)
+#define I40E_GLPES_TCPRXTWOHOLELO 0x0001E030
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT 0
+#define I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_MASK (0xFFFFFFFF << I40E_GLPES_TCPRXTWOHOLELO_TCPRXTWOHOLELO_SHIFT)
+#define I40E_GLPES_TCPRXUNEXPERR 0x0001E008
+#define I40E_GLPES_TCPRXUNEXPERR_TCPRXUNEXPERR_SHIFT 0
+#define I40E_GLPES_TCPRXUNEXPERR_TCPRXUNEXPERR_MASK (0xFFFFFF << I40E_GLPES_TCPRXUNEXPERR_TCPRXUNEXPERR_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTHI 0x0001E04C
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXRETRANSFASTHI_TCPTXRETRANSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXRETRANSFASTLO 0x0001E048
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXRETRANSFASTLO_TCPTXRETRANSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTHI 0x0001E054
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSFASTHI_TCPTXTOUTSFASTHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSFASTLO 0x0001E050
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSFASTLO_TCPTXTOUTSFASTLO_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSHI 0x0001E05C
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_MASK (0xFFFFFF << I40E_GLPES_TCPTXTOUTSHI_TCPTXTOUTSHI_SHIFT)
+#define I40E_GLPES_TCPTXTOUTSLO 0x0001E058
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT 0
+#define I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_MASK (0xFFFFFFFF << I40E_GLPES_TCPTXTOUTSLO_TCPTXTOUTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXDISCARD(_i) (0x00018600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXDISCARD_IP4RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSHI(_i) (0x00018804 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXFRAGSHI_IP4RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXFRAGSLO(_i) (0x00018800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXFRAGSLO_IP4RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSHI(_i) (0x00018A04 + ((_i) * 4))
+#define I40E_GLPES_VFIP4RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCOCTSHI_IP4RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCOCTSLO(_i) (0x00018A00 + ((_i) * 4))
+#define I40E_GLPES_VFIP4RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCOCTSLO_IP4RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSHI(_i) (0x00018C04 + ((_i) * 4))
+#define I40E_GLPES_VFIP4RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXMCPKTSHI_IP4RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXMCPKTSLO(_i) (0x00018C00 + ((_i) * 4))
+#define I40E_GLPES_VFIP4RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXMCPKTSLO_IP4RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSHI(_i) (0x00018204 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXOCTSHI_IP4RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXOCTSLO(_i) (0x00018200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXOCTSLO_IP4RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSHI(_i) (0x00018404 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4RXPKTSHI_IP4RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4RXPKTSLO(_i) (0x00018400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXPKTSLO_IP4RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4RXTRUNC(_i) (0x00018700 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4RXTRUNC_IP4RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSHI(_i) (0x00019E04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXFRAGSHI_IP4TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXFRAGSLO(_i) (0x00019E00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXFRAGSLO_IP4TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSHI(_i) (0x0001A004 + ((_i) * 4))
+#define I40E_GLPES_VFIP4TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCOCTSHI_IP4TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCOCTSLO(_i) (0x0001A000 + ((_i) * 4))
+#define I40E_GLPES_VFIP4TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCOCTSLO_IP4TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSHI(_i) (0x0001A204 + ((_i) * 4))
+#define I40E_GLPES_VFIP4TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXMCPKTSHI_IP4TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXMCPKTSLO(_i) (0x0001A200 + ((_i) * 4))
+#define I40E_GLPES_VFIP4TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXMCPKTSLO_IP4TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXNOROUTE(_i) (0x0001AE00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP4TXNOROUTE_IP4TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSHI(_i) (0x00019A04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXOCTSHI_IP4TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXOCTSLO(_i) (0x00019A00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXOCTSLO_IP4TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSHI(_i) (0x00019C04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP4TXPKTSHI_IP4TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP4TXPKTSLO(_i) (0x00019C00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP4TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP4TXPKTSLO_IP4TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXDISCARD(_i) (0x00019200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXDISCARD_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT 0
+#define I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXDISCARD_IP6RXDISCARD_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSHI(_i) (0x00019404 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXFRAGSHI_IP6RXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXFRAGSLO(_i) (0x00019400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXFRAGSLO_IP6RXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSHI(_i) (0x00019604 + ((_i) * 4))
+#define I40E_GLPES_VFIP6RXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCOCTSHI_IP6RXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCOCTSLO(_i) (0x00019600 + ((_i) * 4))
+#define I40E_GLPES_VFIP6RXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCOCTSLO_IP6RXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSHI(_i) (0x00019804 + ((_i) * 4))
+#define I40E_GLPES_VFIP6RXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXMCPKTSHI_IP6RXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXMCPKTSLO(_i) (0x00019800 + ((_i) * 4))
+#define I40E_GLPES_VFIP6RXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXMCPKTSLO_IP6RXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSHI(_i) (0x00018E04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXOCTSHI_IP6RXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXOCTSLO(_i) (0x00018E00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXOCTSLO_IP6RXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSHI(_i) (0x00019004 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6RXPKTSHI_IP6RXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6RXPKTSLO(_i) (0x00019000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXPKTSLO_IP6RXPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6RXTRUNC(_i) (0x00019300 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6RXTRUNC_MAX_INDEX 31
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT 0
+#define I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6RXTRUNC_IP6RXTRUNC_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSHI(_i) (0x0001A804 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXFRAGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXFRAGSHI_IP6TXFRAGSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXFRAGSLO(_i) (0x0001A800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXFRAGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXFRAGSLO_IP6TXFRAGSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSHI(_i) (0x0001AA04 + ((_i) * 4))
+#define I40E_GLPES_VFIP6TXMCOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCOCTSHI_IP6TXMCOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCOCTSLO(_i) (0x0001AA00 + ((_i) * 4))
+#define I40E_GLPES_VFIP6TXMCOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCOCTSLO_IP6TXMCOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSHI(_i) (0x0001AC04 + ((_i) * 4))
+#define I40E_GLPES_VFIP6TXMCPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXMCPKTSHI_IP6TXMCPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXMCPKTSLO(_i) (0x0001AC00 + ((_i) * 4))
+#define I40E_GLPES_VFIP6TXMCPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXMCPKTSLO_IP6TXMCPKTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXNOROUTE(_i) (0x0001AF00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXNOROUTE_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT 0
+#define I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_MASK (0xFFFFFF << I40E_GLPES_VFIP6TXNOROUTE_IP6TXNOROUTE_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSHI(_i) (0x0001A404 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXOCTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXOCTSHI_IP6TXOCTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXOCTSLO(_i) (0x0001A400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXOCTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXOCTSLO_IP6TXOCTSLO_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSHI(_i) (0x0001A604 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFIP6TXPKTSHI_IP6TXPKTSHI_SHIFT)
+#define I40E_GLPES_VFIP6TXPKTSLO(_i) (0x0001A600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFIP6TXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFIP6TXPKTSLO_IP6TXPKTSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSHI(_i) (0x0001BE04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXRDSLO(_i) (0x0001BE00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSHI(_i) (0x0001C004 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXSNDSLO(_i) (0x0001C000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSHI(_i) (0x0001BC04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMARXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMARXWRSLO(_i) (0x0001BC00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMARXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMARXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSHI(_i) (0x0001C404 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXRDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXRDSHI_RDMARXRDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXRDSLO(_i) (0x0001C400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXRDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXRDSLO_RDMARXRDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSHI(_i) (0x0001C604 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXSNDSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXSNDSHI_RDMARXSNDSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXSNDSLO(_i) (0x0001C600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXSNDSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXSNDSLO_RDMARXSNDSLO_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSHI(_i) (0x0001C204 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXWRSHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_MASK (0xFFFF << I40E_GLPES_VFRDMATXWRSHI_RDMARXWRSHI_SHIFT)
+#define I40E_GLPES_VFRDMATXWRSLO(_i) (0x0001C200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMATXWRSLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT 0
+#define I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMATXWRSLO_RDMARXWRSLO_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDHI(_i) (0x0001C804 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMAVBNDHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDHI_RDMAVBNDHI_SHIFT)
+#define I40E_GLPES_VFRDMAVBNDLO(_i) (0x0001C800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMAVBNDLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVBNDLO_RDMAVBNDLO_SHIFT)
+#define I40E_GLPES_VFRDMAVINVHI(_i) (0x0001CA04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMAVINVHI_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVHI_RDMAVINVHI_SHIFT)
+#define I40E_GLPES_VFRDMAVINVLO(_i) (0x0001CA00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRDMAVINVLO_MAX_INDEX 31
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT 0
+#define I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_MASK (0xFFFFFFFF << I40E_GLPES_VFRDMAVINVLO_RDMAVINVLO_SHIFT)
+#define I40E_GLPES_VFRXVLANERR(_i) (0x00018000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFRXVLANERR_MAX_INDEX 31
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT 0
+#define I40E_GLPES_VFRXVLANERR_RXVLANERR_MASK (0xFFFFFF << I40E_GLPES_VFRXVLANERR_RXVLANERR_SHIFT)
+#define I40E_GLPES_VFTCPRTXSEG(_i) (0x0001B600 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPRTXSEG_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT 0
+#define I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRTXSEG_TCPRTXSEG_SHIFT)
+#define I40E_GLPES_VFTCPRXOPTERR(_i) (0x0001B200 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPRXOPTERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXOPTERR_TCPRXOPTERR_SHIFT)
+#define I40E_GLPES_VFTCPRXPROTOERR(_i) (0x0001B300 + ((_i) * 4))
+#define I40E_GLPES_VFTCPRXPROTOERR_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT 0
+#define I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_MASK (0xFFFFFF << I40E_GLPES_VFTCPRXPROTOERR_TCPRXPROTOERR_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSHI(_i) (0x0001B004 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPRXSEGSHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_MASK (0xFFFF << I40E_GLPES_VFTCPRXSEGSHI_TCPRXSEGSHI_SHIFT)
+#define I40E_GLPES_VFTCPRXSEGSLO(_i) (0x0001B000 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPRXSEGSLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT 0
+#define I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPRXSEGSLO_TCPRXSEGSLO_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGHI(_i) (0x0001B404 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPTXSEGHI_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_MASK (0xFFFF << I40E_GLPES_VFTCPTXSEGHI_TCPTXSEGHI_SHIFT)
+#define I40E_GLPES_VFTCPTXSEGLO(_i) (0x0001B400 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFTCPTXSEGLO_MAX_INDEX 31
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT 0
+#define I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_MASK (0xFFFFFFFF << I40E_GLPES_VFTCPTXSEGLO_TCPTXSEGLO_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSHI(_i) (0x0001B804 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFUDPRXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPRXPKTSHI_UDPRXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPRXPKTSLO(_i) (0x0001B800 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFUDPRXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPRXPKTSLO_UDPRXPKTSLO_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSHI(_i) (0x0001BA04 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFUDPTXPKTSHI_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_MASK (0xFFFF << I40E_GLPES_VFUDPTXPKTSHI_UDPTXPKTSHI_SHIFT)
+#define I40E_GLPES_VFUDPTXPKTSLO(_i) (0x0001BA00 + ((_i) * 4)) /* _i=0...31 */
+#define I40E_GLPES_VFUDPTXPKTSLO_MAX_INDEX 31
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT 0
+#define I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_MASK (0xFFFFFFFF << I40E_GLPES_VFUDPTXPKTSLO_UDPTXPKTSLO_SHIFT)
+#define I40E_GLPM_DMACR 0x000881F4
+#define I40E_GLPM_DMACR_DMACWT_SHIFT 0
+#define I40E_GLPM_DMACR_DMACWT_MASK (0xFFFF << I40E_GLPM_DMACR_DMACWT_SHIFT)
+#define I40E_GLPM_DMACR_EXIT_DC_SHIFT 29
+#define I40E_GLPM_DMACR_EXIT_DC_MASK (0x1 << I40E_GLPM_DMACR_EXIT_DC_SHIFT)
+#define I40E_GLPM_DMACR_LX_COALESCING_INDICATION_SHIFT 30
+#define I40E_GLPM_DMACR_LX_COALESCING_INDICATION_MASK (0x1 << I40E_GLPM_DMACR_LX_COALESCING_INDICATION_SHIFT)
+#define I40E_GLPM_DMACR_DMAC_EN_SHIFT 31
+#define I40E_GLPM_DMACR_DMAC_EN_MASK (0x1 << I40E_GLPM_DMACR_DMAC_EN_SHIFT)
+#define I40E_GLPM_LTRC 0x000BE500
+#define I40E_GLPM_LTRC_SLTRV_SHIFT 0
+#define I40E_GLPM_LTRC_SLTRV_MASK (0x3FF << I40E_GLPM_LTRC_SLTRV_SHIFT)
+#define I40E_GLPM_LTRC_SSCALE_SHIFT 10
+#define I40E_GLPM_LTRC_SSCALE_MASK (0x7 << I40E_GLPM_LTRC_SSCALE_SHIFT)
+#define I40E_GLPM_LTRC_LTRS_REQUIREMENT_SHIFT 15
+#define I40E_GLPM_LTRC_LTRS_REQUIREMENT_MASK (0x1 << I40E_GLPM_LTRC_LTRS_REQUIREMENT_SHIFT)
+#define I40E_GLPM_LTRC_NSLTRV_SHIFT 16
+#define I40E_GLPM_LTRC_NSLTRV_MASK (0x3FF << I40E_GLPM_LTRC_NSLTRV_SHIFT)
+#define I40E_GLPM_LTRC_NSSCALE_SHIFT 26
+#define I40E_GLPM_LTRC_NSSCALE_MASK (0x7 << I40E_GLPM_LTRC_NSSCALE_SHIFT)
+#define I40E_GLPM_LTRC_LTR_SEND_SHIFT 30
+#define I40E_GLPM_LTRC_LTR_SEND_MASK (0x1 << I40E_GLPM_LTRC_LTR_SEND_SHIFT)
+#define I40E_GLPM_LTRC_LTRNS_REQUIREMENT_SHIFT 31
+#define I40E_GLPM_LTRC_LTRNS_REQUIREMENT_MASK (0x1 << I40E_GLPM_LTRC_LTRNS_REQUIREMENT_SHIFT)
+#define I40E_PRTPM_EEE_STAT 0x001E4320
+#define I40E_PRTPM_EEE_STAT_EEE_NEG_SHIFT 29
+#define I40E_PRTPM_EEE_STAT_EEE_NEG_MASK (0x1 << I40E_PRTPM_EEE_STAT_EEE_NEG_SHIFT)
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT 30
+#define I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_MASK (0x1 << I40E_PRTPM_EEE_STAT_RX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT 31
+#define I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_MASK (0x1 << I40E_PRTPM_EEE_STAT_TX_LPI_STATUS_SHIFT)
+#define I40E_PRTPM_EEEC 0x001E4380
+#define I40E_PRTPM_EEEC_TW_WAKE_MIN_SHIFT 16
+#define I40E_PRTPM_EEEC_TW_WAKE_MIN_MASK (0x3F << I40E_PRTPM_EEEC_TW_WAKE_MIN_SHIFT)
+#define I40E_PRTPM_EEEC_TX_LU_LPI_DLY_SHIFT 24
+#define I40E_PRTPM_EEEC_TX_LU_LPI_DLY_MASK (0x3 << I40E_PRTPM_EEEC_TX_LU_LPI_DLY_SHIFT)
+#define I40E_PRTPM_EEEC_TEEE_DLY_SHIFT 26
+#define I40E_PRTPM_EEEC_TEEE_DLY_MASK (0x3F << I40E_PRTPM_EEEC_TEEE_DLY_SHIFT)
+#define I40E_PRTPM_EEEFWD 0x001E4400
+#define I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_SHIFT 31
+#define I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_MASK (0x1 << I40E_PRTPM_EEEFWD_EEE_FW_CONFIG_DONE_SHIFT)
+#define I40E_PRTPM_EEER 0x001E4360
+#define I40E_PRTPM_EEER_TW_SYSTEM_SHIFT 0
+#define I40E_PRTPM_EEER_TW_SYSTEM_MASK (0xFFFF << I40E_PRTPM_EEER_TW_SYSTEM_SHIFT)
+#define I40E_PRTPM_EEER_TX_LPI_EN_SHIFT 16
+#define I40E_PRTPM_EEER_TX_LPI_EN_MASK (0x1 << I40E_PRTPM_EEER_TX_LPI_EN_SHIFT)
+#define I40E_PRTPM_EEETXC 0x001E43E0
+#define I40E_PRTPM_EEETXC_TW_PHY_SHIFT 0
+#define I40E_PRTPM_EEETXC_TW_PHY_MASK (0xFFFF << I40E_PRTPM_EEETXC_TW_PHY_SHIFT)
+#define I40E_PRTPM_GC 0x000B8140
+#define I40E_PRTPM_GC_EMP_LINK_ON_SHIFT 0
+#define I40E_PRTPM_GC_EMP_LINK_ON_MASK (0x1 << I40E_PRTPM_GC_EMP_LINK_ON_SHIFT)
+#define I40E_PRTPM_GC_MNG_VETO_SHIFT 1
+#define I40E_PRTPM_GC_MNG_VETO_MASK (0x1 << I40E_PRTPM_GC_MNG_VETO_SHIFT)
+#define I40E_PRTPM_GC_RATD_SHIFT 2
+#define I40E_PRTPM_GC_RATD_MASK (0x1 << I40E_PRTPM_GC_RATD_SHIFT)
+#define I40E_PRTPM_GC_LCDMP_SHIFT 3
+#define I40E_PRTPM_GC_LCDMP_MASK (0x1 << I40E_PRTPM_GC_LCDMP_SHIFT)
+#define I40E_PRTPM_GC_LPLU_ASSERTED_SHIFT 31
+#define I40E_PRTPM_GC_LPLU_ASSERTED_MASK (0x1 << I40E_PRTPM_GC_LPLU_ASSERTED_SHIFT)
+#define I40E_PRTPM_HPTC 0x000AC800
+#define I40E_PRTPM_HPTC_HIGH_PRI_TC_SHIFT 0
+#define I40E_PRTPM_HPTC_HIGH_PRI_TC_MASK (0xFF << I40E_PRTPM_HPTC_HIGH_PRI_TC_SHIFT)
+#define I40E_PRTPM_RLPIC 0x001E43A0
+#define I40E_PRTPM_RLPIC_ERLPIC_SHIFT 0
+#define I40E_PRTPM_RLPIC_ERLPIC_MASK (0xFFFFFFFF << I40E_PRTPM_RLPIC_ERLPIC_SHIFT)
+#define I40E_PRTPM_TLPIC 0x001E43C0
+#define I40E_PRTPM_TLPIC_ETLPIC_SHIFT 0
+#define I40E_PRTPM_TLPIC_ETLPIC_MASK (0xFFFFFFFF << I40E_PRTPM_TLPIC_ETLPIC_SHIFT)
+#define I40E_GLRPB_DPSS 0x000AC828
+#define I40E_GLRPB_DPSS_DPS_TCN_SHIFT 0
+#define I40E_GLRPB_DPSS_DPS_TCN_MASK (0xFFFFF << I40E_GLRPB_DPSS_DPS_TCN_SHIFT)
+#define I40E_GLRPB_GHW 0x000AC830
+#define I40E_GLRPB_GHW_GHW_SHIFT 0
+#define I40E_GLRPB_GHW_GHW_MASK (0xFFFFF << I40E_GLRPB_GHW_GHW_SHIFT)
+#define I40E_GLRPB_GLW 0x000AC834
+#define I40E_GLRPB_GLW_GLW_SHIFT 0
+#define I40E_GLRPB_GLW_GLW_MASK (0xFFFFF << I40E_GLRPB_GLW_GLW_SHIFT)
+#define I40E_GLRPB_PHW 0x000AC844
+#define I40E_GLRPB_PHW_PHW_SHIFT 0
+#define I40E_GLRPB_PHW_PHW_MASK (0xFFFFF << I40E_GLRPB_PHW_PHW_SHIFT)
+#define I40E_GLRPB_PLW 0x000AC848
+#define I40E_GLRPB_PLW_PLW_SHIFT 0
+#define I40E_GLRPB_PLW_PLW_MASK (0xFFFFF << I40E_GLRPB_PLW_PLW_SHIFT)
+#define I40E_PRTRPB_DHW(_i) (0x000AC100 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTRPB_DHW_MAX_INDEX 7
+#define I40E_PRTRPB_DHW_DHW_TCN_SHIFT 0
+#define I40E_PRTRPB_DHW_DHW_TCN_MASK (0xFFFFF << I40E_PRTRPB_DHW_DHW_TCN_SHIFT)
+#define I40E_PRTRPB_DLW(_i) (0x000AC220 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTRPB_DLW_MAX_INDEX 7
+#define I40E_PRTRPB_DLW_DLW_TCN_SHIFT 0
+#define I40E_PRTRPB_DLW_DLW_TCN_MASK (0xFFFFF << I40E_PRTRPB_DLW_DLW_TCN_SHIFT)
+#define I40E_PRTRPB_DPS(_i) (0x000AC320 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTRPB_DPS_MAX_INDEX 7
+#define I40E_PRTRPB_DPS_DPS_TCN_SHIFT 0
+#define I40E_PRTRPB_DPS_DPS_TCN_MASK (0xFFFFF << I40E_PRTRPB_DPS_DPS_TCN_SHIFT)
+#define I40E_PRTRPB_SHT(_i) (0x000AC480 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTRPB_SHT_MAX_INDEX 7
+#define I40E_PRTRPB_SHT_SHT_TCN_SHIFT 0
+#define I40E_PRTRPB_SHT_SHT_TCN_MASK (0xFFFFF << I40E_PRTRPB_SHT_SHT_TCN_SHIFT)
+#define I40E_PRTRPB_SHW 0x000AC580
+#define I40E_PRTRPB_SHW_SHW_SHIFT 0
+#define I40E_PRTRPB_SHW_SHW_MASK (0xFFFFF << I40E_PRTRPB_SHW_SHW_SHIFT)
+#define I40E_PRTRPB_SLT(_i) (0x000AC5A0 + ((_i) * 32)) /* _i=0...7 */
+#define I40E_PRTRPB_SLT_MAX_INDEX 7
+#define I40E_PRTRPB_SLT_SLT_TCN_SHIFT 0
+#define I40E_PRTRPB_SLT_SLT_TCN_MASK (0xFFFFF << I40E_PRTRPB_SLT_SLT_TCN_SHIFT)
+#define I40E_PRTRPB_SLW 0x000AC6A0
+#define I40E_PRTRPB_SLW_SLW_SHIFT 0
+#define I40E_PRTRPB_SLW_SLW_MASK (0xFFFFF << I40E_PRTRPB_SLW_SLW_SHIFT)
+#define I40E_PRTRPB_SPS 0x000AC7C0
+#define I40E_PRTRPB_SPS_SPS_SHIFT 0
+#define I40E_PRTRPB_SPS_SPS_MASK (0xFFFFF << I40E_PRTRPB_SPS_SPS_SHIFT)
+#define I40E_GLQF_APBVT(_i) (0x00260000 + ((_i) * 4)) /* _i=0...2047 */
+#define I40E_GLQF_APBVT_MAX_INDEX 2047
+#define I40E_GLQF_APBVT_APBVT_SHIFT 0
+#define I40E_GLQF_APBVT_APBVT_MASK (0xFFFFFFFF << I40E_GLQF_APBVT_APBVT_SHIFT)
+#define I40E_GLQF_CTL 0x00269BA4
+#define I40E_GLQF_CTL_HTOEP_SHIFT 1
+#define I40E_GLQF_CTL_HTOEP_MASK (0x1 << I40E_GLQF_CTL_HTOEP_SHIFT)
+#define I40E_GLQF_CTL_HTOEP_FCOE_SHIFT 2
+#define I40E_GLQF_CTL_HTOEP_FCOE_MASK (0x1 << I40E_GLQF_CTL_HTOEP_FCOE_SHIFT)
+#define I40E_GLQF_CTL_PCNT_ALLOC_SHIFT 3
+#define I40E_GLQF_CTL_PCNT_ALLOC_MASK (0x7 << I40E_GLQF_CTL_PCNT_ALLOC_SHIFT)
+#define I40E_GLQF_CTL_DDPLPEN_SHIFT 7
+#define I40E_GLQF_CTL_DDPLPEN_MASK (0x1 << I40E_GLQF_CTL_DDPLPEN_SHIFT)
+#define I40E_GLQF_CTL_MAXPEBLEN_SHIFT 8
+#define I40E_GLQF_CTL_MAXPEBLEN_MASK (0x7 << I40E_GLQF_CTL_MAXPEBLEN_SHIFT)
+#define I40E_GLQF_CTL_MAXFCBLEN_SHIFT 11
+#define I40E_GLQF_CTL_MAXFCBLEN_MASK (0x7 << I40E_GLQF_CTL_MAXFCBLEN_SHIFT)
+#define I40E_GLQF_CTL_MAXFDBLEN_SHIFT 14
+#define I40E_GLQF_CTL_MAXFDBLEN_MASK (0x7 << I40E_GLQF_CTL_MAXFDBLEN_SHIFT)
+#define I40E_GLQF_CTL_FDBEST_SHIFT 17
+#define I40E_GLQF_CTL_FDBEST_MASK (0xFF << I40E_GLQF_CTL_FDBEST_SHIFT)
+#define I40E_GLQF_CTL_PROGPRIO_SHIFT 25
+#define I40E_GLQF_CTL_PROGPRIO_MASK (0x1 << I40E_GLQF_CTL_PROGPRIO_SHIFT)
+#define I40E_GLQF_CTL_INVALPRIO_SHIFT 26
+#define I40E_GLQF_CTL_INVALPRIO_MASK (0x1 << I40E_GLQF_CTL_INVALPRIO_SHIFT)
+#define I40E_GLQF_CTL_IGNORE_IP_SHIFT 27
+#define I40E_GLQF_CTL_IGNORE_IP_MASK (0x1 << I40E_GLQF_CTL_IGNORE_IP_SHIFT)
+#define I40E_GLQF_FDCNT_0 0x00269BAC
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT 0
+#define I40E_GLQF_FDCNT_0_GUARANT_CNT_MASK (0x1FFF << I40E_GLQF_FDCNT_0_GUARANT_CNT_SHIFT)
+#define I40E_GLQF_FDCNT_0_BESTCNT_SHIFT 13
+#define I40E_GLQF_FDCNT_0_BESTCNT_MASK (0x1FFF << I40E_GLQF_FDCNT_0_BESTCNT_SHIFT)
+#define I40E_GLQF_HSYM(_i) (0x00269D00 + ((_i) * 4)) /* _i=0...63 */
+#define I40E_GLQF_HSYM_MAX_INDEX 63
+#define I40E_GLQF_HSYM_SYMH_ENA_SHIFT 0
+#define I40E_GLQF_HSYM_SYMH_ENA_MASK (0x1 << I40E_GLQF_HSYM_SYMH_ENA_SHIFT)
+#define I40E_GLQF_PCNT(_i) (0x00266800 + ((_i) * 4)) /* _i=0...511 */
+#define I40E_GLQF_PCNT_MAX_INDEX 511
+#define I40E_GLQF_PCNT_PCNT_SHIFT 0
+#define I40E_GLQF_PCNT_PCNT_MASK (0xFFFFFFFF << I40E_GLQF_PCNT_PCNT_SHIFT)
+#define I40E_GLQF_SWAP(_i, _j) (0x00267E00 + ((_i) * 4 + (_j) * 8)) /* _i=0...1, _j=0...63 */
+#define I40E_GLQF_SWAP_MAX_INDEX 1
+#define I40E_GLQF_SWAP_OFF0_SRC0_SHIFT 0
+#define I40E_GLQF_SWAP_OFF0_SRC0_MASK (0x3F << I40E_GLQF_SWAP_OFF0_SRC0_SHIFT)
+#define I40E_GLQF_SWAP_OFF0_SRC1_SHIFT 6
+#define I40E_GLQF_SWAP_OFF0_SRC1_MASK (0x3F << I40E_GLQF_SWAP_OFF0_SRC1_SHIFT)
+#define I40E_GLQF_SWAP_FLEN0_SHIFT 12
+#define I40E_GLQF_SWAP_FLEN0_MASK (0xF << I40E_GLQF_SWAP_FLEN0_SHIFT)
+#define I40E_GLQF_SWAP_OFF1_SRC0_SHIFT 16
+#define I40E_GLQF_SWAP_OFF1_SRC0_MASK (0x3F << I40E_GLQF_SWAP_OFF1_SRC0_SHIFT)
+#define I40E_GLQF_SWAP_OFF1_SRC1_SHIFT 22
+#define I40E_GLQF_SWAP_OFF1_SRC1_MASK (0x3F << I40E_GLQF_SWAP_OFF1_SRC1_SHIFT)
+#define I40E_GLQF_SWAP_FLEN1_SHIFT 28
+#define I40E_GLQF_SWAP_FLEN1_MASK (0xF << I40E_GLQF_SWAP_FLEN1_SHIFT)
+#define I40E_PFQF_CTL_0 0x001C0AC0
+#define I40E_PFQF_CTL_0_PEHSIZE_SHIFT 0
+#define I40E_PFQF_CTL_0_PEHSIZE_MASK (0x1F << I40E_PFQF_CTL_0_PEHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PEDSIZE_SHIFT 5
+#define I40E_PFQF_CTL_0_PEDSIZE_MASK (0x1F << I40E_PFQF_CTL_0_PEDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT 10
+#define I40E_PFQF_CTL_0_PFFCHSIZE_MASK (0xF << I40E_PFQF_CTL_0_PFFCHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT 14
+#define I40E_PFQF_CTL_0_PFFCDSIZE_MASK (0x3 << I40E_PFQF_CTL_0_PFFCDSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT 16
+#define I40E_PFQF_CTL_0_HASHLUTSIZE_MASK (0x1 << I40E_PFQF_CTL_0_HASHLUTSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_FD_ENA_SHIFT 17
+#define I40E_PFQF_CTL_0_FD_ENA_MASK (0x1 << I40E_PFQF_CTL_0_FD_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT 18
+#define I40E_PFQF_CTL_0_ETYPE_ENA_MASK (0x1 << I40E_PFQF_CTL_0_ETYPE_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT 19
+#define I40E_PFQF_CTL_0_MACVLAN_ENA_MASK (0x1 << I40E_PFQF_CTL_0_MACVLAN_ENA_SHIFT)
+#define I40E_PFQF_CTL_0_VFFCHSIZE_SHIFT 20
+#define I40E_PFQF_CTL_0_VFFCHSIZE_MASK (0xF << I40E_PFQF_CTL_0_VFFCHSIZE_SHIFT)
+#define I40E_PFQF_CTL_0_VFFCDSIZE_SHIFT 24
+#define I40E_PFQF_CTL_0_VFFCDSIZE_MASK (0x3 << I40E_PFQF_CTL_0_VFFCDSIZE_SHIFT)
+#define I40E_PFQF_CTL_1 0x00245D80
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT 0
+#define I40E_PFQF_CTL_1_CLEARFDTABLE_MASK (0x1 << I40E_PFQF_CTL_1_CLEARFDTABLE_SHIFT)
+#define I40E_PFQF_FDALLOC 0x00246280
+#define I40E_PFQF_FDALLOC_FDALLOC_SHIFT 0
+#define I40E_PFQF_FDALLOC_FDALLOC_MASK (0xFF << I40E_PFQF_FDALLOC_FDALLOC_SHIFT)
+#define I40E_PFQF_FDALLOC_FDBEST_SHIFT 8
+#define I40E_PFQF_FDALLOC_FDBEST_MASK (0xFF << I40E_PFQF_FDALLOC_FDBEST_SHIFT)
+#define I40E_PFQF_FDSTAT 0x00246380
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT 0
+#define I40E_PFQF_FDSTAT_GUARANT_CNT_MASK (0x1FFF << I40E_PFQF_FDSTAT_GUARANT_CNT_SHIFT)
+#define I40E_PFQF_FDSTAT_BEST_CNT_SHIFT 16
+#define I40E_PFQF_FDSTAT_BEST_CNT_MASK (0x1FFF << I40E_PFQF_FDSTAT_BEST_CNT_SHIFT)
+#define I40E_PFQF_HENA(_i) (0x00245900 + ((_i) * 128)) /* _i=0...1 */
+#define I40E_PFQF_HENA_MAX_INDEX 1
+#define I40E_PFQF_HENA_PTYPE_ENA_SHIFT 0
+#define I40E_PFQF_HENA_PTYPE_ENA_MASK (0xFFFFFFFF << I40E_PFQF_HENA_PTYPE_ENA_SHIFT)
+#define I40E_PFQF_HKEY(_i) (0x00244800 + ((_i) * 128)) /* _i=0...12 */
+#define I40E_PFQF_HKEY_MAX_INDEX 12
+#define I40E_PFQF_HKEY_KEY_0_SHIFT 0
+#define I40E_PFQF_HKEY_KEY_0_MASK (0xFF << I40E_PFQF_HKEY_KEY_0_SHIFT)
+#define I40E_PFQF_HKEY_KEY_1_SHIFT 8
+#define I40E_PFQF_HKEY_KEY_1_MASK (0xFF << I40E_PFQF_HKEY_KEY_1_SHIFT)
+#define I40E_PFQF_HKEY_KEY_2_SHIFT 16
+#define I40E_PFQF_HKEY_KEY_2_MASK (0xFF << I40E_PFQF_HKEY_KEY_2_SHIFT)
+#define I40E_PFQF_HKEY_KEY_3_SHIFT 24
+#define I40E_PFQF_HKEY_KEY_3_MASK (0xFF << I40E_PFQF_HKEY_KEY_3_SHIFT)
+#define I40E_PFQF_HLUT(_i) (0x00240000 + ((_i) * 128)) /* _i=0...127 */
+#define I40E_PFQF_HLUT_MAX_INDEX 127
+#define I40E_PFQF_HLUT_LUT0_SHIFT 0
+#define I40E_PFQF_HLUT_LUT0_MASK (0x3F << I40E_PFQF_HLUT_LUT0_SHIFT)
+#define I40E_PFQF_HLUT_LUT1_SHIFT 8
+#define I40E_PFQF_HLUT_LUT1_MASK (0x3F << I40E_PFQF_HLUT_LUT1_SHIFT)
+#define I40E_PFQF_HLUT_LUT2_SHIFT 16
+#define I40E_PFQF_HLUT_LUT2_MASK (0x3F << I40E_PFQF_HLUT_LUT2_SHIFT)
+#define I40E_PFQF_HLUT_LUT3_SHIFT 24
+#define I40E_PFQF_HLUT_LUT3_MASK (0x3F << I40E_PFQF_HLUT_LUT3_SHIFT)
+#define I40E_PFQF_HREGION(_i) (0x00245400 + ((_i) * 128)) /* _i=0...7 */
+#define I40E_PFQF_HREGION_MAX_INDEX 7
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_0_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_0_SHIFT)
+#define I40E_PFQF_HREGION_REGION_0_SHIFT 1
+#define I40E_PFQF_HREGION_REGION_0_MASK (0x7 << I40E_PFQF_HREGION_REGION_0_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_1_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_1_SHIFT)
+#define I40E_PFQF_HREGION_REGION_1_SHIFT 5
+#define I40E_PFQF_HREGION_REGION_1_MASK (0x7 << I40E_PFQF_HREGION_REGION_1_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_2_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_2_SHIFT)
+#define I40E_PFQF_HREGION_REGION_2_SHIFT 9
+#define I40E_PFQF_HREGION_REGION_2_MASK (0x7 << I40E_PFQF_HREGION_REGION_2_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_3_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_3_SHIFT)
+#define I40E_PFQF_HREGION_REGION_3_SHIFT 13
+#define I40E_PFQF_HREGION_REGION_3_MASK (0x7 << I40E_PFQF_HREGION_REGION_3_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_4_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_4_SHIFT)
+#define I40E_PFQF_HREGION_REGION_4_SHIFT 17
+#define I40E_PFQF_HREGION_REGION_4_MASK (0x7 << I40E_PFQF_HREGION_REGION_4_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_5_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_5_SHIFT)
+#define I40E_PFQF_HREGION_REGION_5_SHIFT 21
+#define I40E_PFQF_HREGION_REGION_5_MASK (0x7 << I40E_PFQF_HREGION_REGION_5_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_6_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_6_SHIFT)
+#define I40E_PFQF_HREGION_REGION_6_SHIFT 25
+#define I40E_PFQF_HREGION_REGION_6_MASK (0x7 << I40E_PFQF_HREGION_REGION_6_SHIFT)
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_PFQF_HREGION_OVERRIDE_ENA_7_MASK (0x1 << I40E_PFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
+#define I40E_PFQF_HREGION_REGION_7_SHIFT 29
+#define I40E_PFQF_HREGION_REGION_7_MASK (0x7 << I40E_PFQF_HREGION_REGION_7_SHIFT)
+#define I40E_PRTQF_CTL_0 0x00256E60
+#define I40E_PRTQF_CTL_0_HSYM_ENA_SHIFT 0
+#define I40E_PRTQF_CTL_0_HSYM_ENA_MASK (0x1 << I40E_PRTQF_CTL_0_HSYM_ENA_SHIFT)
+#define I40E_PRTQF_FD_FLXINSET(_i) (0x00253800 + ((_i) * 32)) /* _i=0...63 */
+#define I40E_PRTQF_FD_FLXINSET_MAX_INDEX 63
+#define I40E_PRTQF_FD_FLXINSET_INSET_SHIFT 0
+#define I40E_PRTQF_FD_FLXINSET_INSET_MASK (0xFF << I40E_PRTQF_FD_FLXINSET_INSET_SHIFT)
+#define I40E_PRTQF_FD_MSK(_i, _j) (0x00252000 + ((_i) * 64 + (_j) * 32)) /* _i=0...63, _j=0...1 */
+#define I40E_PRTQF_FD_MSK_MAX_INDEX 63
+#define I40E_PRTQF_FD_MSK_MASK_SHIFT 0
+#define I40E_PRTQF_FD_MSK_MASK_MASK (0xFFFF << I40E_PRTQF_FD_MSK_MASK_SHIFT)
+#define I40E_PRTQF_FD_MSK_OFFSET_SHIFT 16
+#define I40E_PRTQF_FD_MSK_OFFSET_MASK (0x3F << I40E_PRTQF_FD_MSK_OFFSET_SHIFT)
+#define I40E_PRTQF_FLX_PIT(_i) (0x00255200 + ((_i) * 32)) /* _i=0...8 */
+#define I40E_PRTQF_FLX_PIT_MAX_INDEX 8
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT 0
+#define I40E_PRTQF_FLX_PIT_SOURCE_OFF_MASK (0x3F << I40E_PRTQF_FLX_PIT_SOURCE_OFF_SHIFT)
+#define I40E_PRTQF_FLX_PIT_FSIZE_SHIFT 6
+#define I40E_PRTQF_FLX_PIT_FSIZE_MASK (0xF << I40E_PRTQF_FLX_PIT_FSIZE_SHIFT)
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT 10
+#define I40E_PRTQF_FLX_PIT_DEST_OFF_MASK (0x3F << I40E_PRTQF_FLX_PIT_DEST_OFF_SHIFT)
+#define I40E_VFQF_HENA1(_i, _VF) (0x00230800 + ((_i) * 1024 + (_VF) * 4))
+#define I40E_VFQF_HENA1_MAX_INDEX 1
+#define I40E_VFQF_HENA1_PTYPE_ENA_SHIFT 0
+#define I40E_VFQF_HENA1_PTYPE_ENA_MASK (0xFFFFFFFF << I40E_VFQF_HENA1_PTYPE_ENA_SHIFT)
+#define I40E_VFQF_HKEY1(_i, _VF) (0x00228000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...12, _VF=0...127 */
+#define I40E_VFQF_HKEY1_MAX_INDEX 12
+#define I40E_VFQF_HKEY1_KEY_0_SHIFT 0
+#define I40E_VFQF_HKEY1_KEY_0_MASK (0xFF << I40E_VFQF_HKEY1_KEY_0_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_1_SHIFT 8
+#define I40E_VFQF_HKEY1_KEY_1_MASK (0xFF << I40E_VFQF_HKEY1_KEY_1_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_2_SHIFT 16
+#define I40E_VFQF_HKEY1_KEY_2_MASK (0xFF << I40E_VFQF_HKEY1_KEY_2_SHIFT)
+#define I40E_VFQF_HKEY1_KEY_3_SHIFT 24
+#define I40E_VFQF_HKEY1_KEY_3_MASK (0xFF << I40E_VFQF_HKEY1_KEY_3_SHIFT)
+#define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */
+#define I40E_VFQF_HLUT1_MAX_INDEX 15
+#define I40E_VFQF_HLUT1_LUT0_SHIFT 0
+#define I40E_VFQF_HLUT1_LUT0_MASK (0xF << I40E_VFQF_HLUT1_LUT0_SHIFT)
+#define I40E_VFQF_HLUT1_LUT1_SHIFT 8
+#define I40E_VFQF_HLUT1_LUT1_MASK (0xF << I40E_VFQF_HLUT1_LUT1_SHIFT)
+#define I40E_VFQF_HLUT1_LUT2_SHIFT 16
+#define I40E_VFQF_HLUT1_LUT2_MASK (0xF << I40E_VFQF_HLUT1_LUT2_SHIFT)
+#define I40E_VFQF_HLUT1_LUT3_SHIFT 24
+#define I40E_VFQF_HLUT1_LUT3_MASK (0xF << I40E_VFQF_HLUT1_LUT3_SHIFT)
+#define I40E_VFQF_HREGION1(_i, _VF) (0x0022E000 + ((_i) * 1024 + (_VF) * 4))
+#define I40E_VFQF_HREGION1_MAX_INDEX 7
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_0_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_0_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_0_SHIFT 1
+#define I40E_VFQF_HREGION1_REGION_0_MASK (0x7 << I40E_VFQF_HREGION1_REGION_0_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_1_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_1_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_1_SHIFT 5
+#define I40E_VFQF_HREGION1_REGION_1_MASK (0x7 << I40E_VFQF_HREGION1_REGION_1_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_2_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_2_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_2_SHIFT 9
+#define I40E_VFQF_HREGION1_REGION_2_MASK (0x7 << I40E_VFQF_HREGION1_REGION_2_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_3_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_3_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_3_SHIFT 13
+#define I40E_VFQF_HREGION1_REGION_3_MASK (0x7 << I40E_VFQF_HREGION1_REGION_3_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_4_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_4_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_4_SHIFT 17
+#define I40E_VFQF_HREGION1_REGION_4_MASK (0x7 << I40E_VFQF_HREGION1_REGION_4_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_5_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_5_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_5_SHIFT 21
+#define I40E_VFQF_HREGION1_REGION_5_MASK (0x7 << I40E_VFQF_HREGION1_REGION_5_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_6_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_6_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_6_SHIFT 25
+#define I40E_VFQF_HREGION1_REGION_6_MASK (0x7 << I40E_VFQF_HREGION1_REGION_6_SHIFT)
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_VFQF_HREGION1_OVERRIDE_ENA_7_MASK (0x1 << I40E_VFQF_HREGION1_OVERRIDE_ENA_7_SHIFT)
+#define I40E_VFQF_HREGION1_REGION_7_SHIFT 29
+#define I40E_VFQF_HREGION1_REGION_7_MASK (0x7 << I40E_VFQF_HREGION1_REGION_7_SHIFT)
+#define I40E_VPQF_CTL(_VF) (0x001C0000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VPQF_CTL_MAX_INDEX 127
+#define I40E_VPQF_CTL_PEHSIZE_SHIFT 0
+#define I40E_VPQF_CTL_PEHSIZE_MASK (0x1F << I40E_VPQF_CTL_PEHSIZE_SHIFT)
+#define I40E_VPQF_CTL_PEDSIZE_SHIFT 5
+#define I40E_VPQF_CTL_PEDSIZE_MASK (0x1F << I40E_VPQF_CTL_PEDSIZE_SHIFT)
+#define I40E_VPQF_CTL_FCHSIZE_SHIFT 10
+#define I40E_VPQF_CTL_FCHSIZE_MASK (0xF << I40E_VPQF_CTL_FCHSIZE_SHIFT)
+#define I40E_VPQF_CTL_FCDSIZE_SHIFT 14
+#define I40E_VPQF_CTL_FCDSIZE_MASK (0x3 << I40E_VPQF_CTL_FCDSIZE_SHIFT)
+#define I40E_VSIQF_CTL(_VSI) (0x0020D800 + ((_VSI) * 4)) /* _i=0...383 */
+#define I40E_VSIQF_CTL_MAX_INDEX 383
+#define I40E_VSIQF_CTL_FCOE_ENA_SHIFT 0
+#define I40E_VSIQF_CTL_FCOE_ENA_MASK (0x1 << I40E_VSIQF_CTL_FCOE_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PETCP_ENA_SHIFT 1
+#define I40E_VSIQF_CTL_PETCP_ENA_MASK (0x1 << I40E_VSIQF_CTL_PETCP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEUUDP_ENA_SHIFT 2
+#define I40E_VSIQF_CTL_PEUUDP_ENA_MASK (0x1 << I40E_VSIQF_CTL_PEUUDP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEMUDP_ENA_SHIFT 3
+#define I40E_VSIQF_CTL_PEMUDP_ENA_MASK (0x1 << I40E_VSIQF_CTL_PEMUDP_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEUFRAG_ENA_SHIFT 4
+#define I40E_VSIQF_CTL_PEUFRAG_ENA_MASK (0x1 << I40E_VSIQF_CTL_PEUFRAG_ENA_SHIFT)
+#define I40E_VSIQF_CTL_PEMFRAG_ENA_SHIFT 5
+#define I40E_VSIQF_CTL_PEMFRAG_ENA_MASK (0x1 << I40E_VSIQF_CTL_PEMFRAG_ENA_SHIFT)
+#define I40E_VSIQF_TCREGION(_i, _VSI) (0x00206000 + ((_i) * 2048 + (_VSI) * 4))
+#define I40E_VSIQF_TCREGION_MAX_INDEX 7
+#define I40E_VSIQF_TCREGION_TC_OFFSET_SHIFT 0
+#define I40E_VSIQF_TCREGION_TC_OFFSET_MASK (0x1FF << I40E_VSIQF_TCREGION_TC_OFFSET_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_SIZE_SHIFT 9
+#define I40E_VSIQF_TCREGION_TC_SIZE_MASK (0x7 << I40E_VSIQF_TCREGION_TC_SIZE_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_OFFSET2_SHIFT 16
+#define I40E_VSIQF_TCREGION_TC_OFFSET2_MASK (0x1FF << I40E_VSIQF_TCREGION_TC_OFFSET2_SHIFT)
+#define I40E_VSIQF_TCREGION_TC_SIZE2_SHIFT 25
+#define I40E_VSIQF_TCREGION_TC_SIZE2_MASK (0x7 << I40E_VSIQF_TCREGION_TC_SIZE2_SHIFT)
+#define I40E_GL_FCOECRC(_i) (0x00314d80 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOECRC_MAX_INDEX 143
+#define I40E_GL_FCOECRC_FCOECRC_SHIFT 0
+#define I40E_GL_FCOECRC_FCOECRC_MASK (0xFFFFFFFF << I40E_GL_FCOECRC_FCOECRC_SHIFT)
+#define I40E_GL_FCOEDDPC(_i) (0x00314480 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDDPC_MAX_INDEX 143
+#define I40E_GL_FCOEDDPC_FCOEDDPC_SHIFT 0
+#define I40E_GL_FCOEDDPC_FCOEDDPC_MASK (0xFFFFFFFF << I40E_GL_FCOEDDPC_FCOEDDPC_SHIFT)
+#define I40E_GL_FCOEDDPEC(_i) (0x00314900 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDDPEC_MAX_INDEX 143
+#define I40E_GL_FCOEDDPEC_CFOEDDPEC_SHIFT 0
+#define I40E_GL_FCOEDDPEC_CFOEDDPEC_MASK (0xFFFFFFFF << I40E_GL_FCOEDDPEC_CFOEDDPEC_SHIFT)
+#define I40E_GL_FCOEDIFEC(_i) (0x00318480 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIFEC_MAX_INDEX 143
+#define I40E_GL_FCOEDIFEC_FCOEDIFRC_SHIFT 0
+#define I40E_GL_FCOEDIFEC_FCOEDIFRC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIFEC_FCOEDIFRC_SHIFT)
+#define I40E_GL_FCOEDIFRC(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIFRC_MAX_INDEX 143
+#define I40E_GL_FCOEDIFRC_FCOEDIFRC_SHIFT 0
+#define I40E_GL_FCOEDIFRC_FCOEDIFRC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIFRC_FCOEDIFRC_SHIFT)
+#define I40E_GL_FCOEDIFTCL(_i) (0x00354000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIFTCL_MAX_INDEX 143
+#define I40E_GL_FCOEDIFTCL_FCOEDIFTC_SHIFT 0
+#define I40E_GL_FCOEDIFTCL_FCOEDIFTC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIFTCL_FCOEDIFTC_SHIFT)
+#define I40E_GL_FCOEDIXAC(_i) (0x0031c000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIXAC_MAX_INDEX 143
+#define I40E_GL_FCOEDIXAC_FCOEDIXAC_SHIFT 0
+#define I40E_GL_FCOEDIXAC_FCOEDIXAC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIXAC_FCOEDIXAC_SHIFT)
+#define I40E_GL_FCOEDIXEC(_i) (0x0034c000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIXEC_MAX_INDEX 143
+#define I40E_GL_FCOEDIXEC_FCOEDIXEC_SHIFT 0
+#define I40E_GL_FCOEDIXEC_FCOEDIXEC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIXEC_FCOEDIXEC_SHIFT)
+#define I40E_GL_FCOEDIXVC(_i) (0x00350000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDIXVC_MAX_INDEX 143
+#define I40E_GL_FCOEDIXVC_FCOEDIXVC_SHIFT 0
+#define I40E_GL_FCOEDIXVC_FCOEDIXVC_MASK (0xFFFFFFFF << I40E_GL_FCOEDIXVC_FCOEDIXVC_SHIFT)
+#define I40E_GL_FCOEDWRCH(_i) (0x00320004 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDWRCH_MAX_INDEX 143
+#define I40E_GL_FCOEDWRCH_FCOEDWRCH_SHIFT 0
+#define I40E_GL_FCOEDWRCH_FCOEDWRCH_MASK (0xFFFF << I40E_GL_FCOEDWRCH_FCOEDWRCH_SHIFT)
+#define I40E_GL_FCOEDWRCL(_i) (0x00320000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDWRCL_MAX_INDEX 143
+#define I40E_GL_FCOEDWRCL_FCOEDWRCL_SHIFT 0
+#define I40E_GL_FCOEDWRCL_FCOEDWRCL_MASK (0xFFFFFFFF << I40E_GL_FCOEDWRCL_FCOEDWRCL_SHIFT)
+#define I40E_GL_FCOEDWTCH(_i) (0x00348084 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDWTCH_MAX_INDEX 143
+#define I40E_GL_FCOEDWTCH_FCOEDWTCH_SHIFT 0
+#define I40E_GL_FCOEDWTCH_FCOEDWTCH_MASK (0xFFFF << I40E_GL_FCOEDWTCH_FCOEDWTCH_SHIFT)
+#define I40E_GL_FCOEDWTCL(_i) (0x00348080 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEDWTCL_MAX_INDEX 143
+#define I40E_GL_FCOEDWTCL_FCOEDWTCL_SHIFT 0
+#define I40E_GL_FCOEDWTCL_FCOEDWTCL_MASK (0xFFFFFFFF << I40E_GL_FCOEDWTCL_FCOEDWTCL_SHIFT)
+#define I40E_GL_FCOELAST(_i) (0x00314000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOELAST_MAX_INDEX 143
+#define I40E_GL_FCOELAST_FCOELAST_SHIFT 0
+#define I40E_GL_FCOELAST_FCOELAST_MASK (0xFFFFFFFF << I40E_GL_FCOELAST_FCOELAST_SHIFT)
+#define I40E_GL_FCOEPRC(_i) (0x00315200 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEPRC_MAX_INDEX 143
+#define I40E_GL_FCOEPRC_FCOEPRC_SHIFT 0
+#define I40E_GL_FCOEPRC_FCOEPRC_MASK (0xFFFFFFFF << I40E_GL_FCOEPRC_FCOEPRC_SHIFT)
+#define I40E_GL_FCOEPTC(_i) (0x00344C00 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOEPTC_MAX_INDEX 143
+#define I40E_GL_FCOEPTC_FCOEPTC_SHIFT 0
+#define I40E_GL_FCOEPTC_FCOEPTC_MASK (0xFFFFFFFF << I40E_GL_FCOEPTC_FCOEPTC_SHIFT)
+#define I40E_GL_FCOERPDC(_i) (0x00324000 + ((_i) * 8)) /* _i=0...143 */
+#define I40E_GL_FCOERPDC_MAX_INDEX 143
+#define I40E_GL_FCOERPDC_FCOERPDC_SHIFT 0
+#define I40E_GL_FCOERPDC_FCOERPDC_MASK (0xFFFFFFFF << I40E_GL_FCOERPDC_FCOERPDC_SHIFT)
+#define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_BPRCH_MAX_INDEX 3
+#define I40E_GLPRT_BPRCH_UPRCH_SHIFT 0
+#define I40E_GLPRT_BPRCH_UPRCH_MASK (0xFFFF << I40E_GLPRT_BPRCH_UPRCH_SHIFT)
+#define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_BPRCL_MAX_INDEX 3
+#define I40E_GLPRT_BPRCL_UPRCH_SHIFT 0
+#define I40E_GLPRT_BPRCL_UPRCH_MASK (0xFFFFFFFF << I40E_GLPRT_BPRCL_UPRCH_SHIFT)
+#define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_BPTCH_MAX_INDEX 3
+#define I40E_GLPRT_BPTCH_UPRCH_SHIFT 0
+#define I40E_GLPRT_BPTCH_UPRCH_MASK (0xFFFF << I40E_GLPRT_BPTCH_UPRCH_SHIFT)
+#define I40E_GLPRT_BPTCL(_i) (0x00300A00 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_BPTCL_MAX_INDEX 3
+#define I40E_GLPRT_BPTCL_UPRCH_SHIFT 0
+#define I40E_GLPRT_BPTCL_UPRCH_MASK (0xFFFFFFFF << I40E_GLPRT_BPTCL_UPRCH_SHIFT)
+#define I40E_GLPRT_CRCERRS(_i) (0x00300080 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_CRCERRS_MAX_INDEX 3
+#define I40E_GLPRT_CRCERRS_CRCERRS_SHIFT 0
+#define I40E_GLPRT_CRCERRS_CRCERRS_MASK (0xFFFFFFFF << I40E_GLPRT_CRCERRS_CRCERRS_SHIFT)
+#define I40E_GLPRT_GORCH(_i) (0x00300004 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_GORCH_MAX_INDEX 3
+#define I40E_GLPRT_GORCH_GORCH_SHIFT 0
+#define I40E_GLPRT_GORCH_GORCH_MASK (0xFFFF << I40E_GLPRT_GORCH_GORCH_SHIFT)
+#define I40E_GLPRT_GORCL(_i) (0x00300000 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_GORCL_MAX_INDEX 3
+#define I40E_GLPRT_GORCL_GORCL_SHIFT 0
+#define I40E_GLPRT_GORCL_GORCL_MASK (0xFFFFFFFF << I40E_GLPRT_GORCL_GORCL_SHIFT)
+#define I40E_GLPRT_GOTCH(_i) (0x00300684 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_GOTCH_MAX_INDEX 3
+#define I40E_GLPRT_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLPRT_GOTCH_GOTCH_MASK (0xFFFF << I40E_GLPRT_GOTCH_GOTCH_SHIFT)
+#define I40E_GLPRT_GOTCL(_i) (0x00300680 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_GOTCL_MAX_INDEX 3
+#define I40E_GLPRT_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLPRT_GOTCL_GOTCL_MASK (0xFFFFFFFF << I40E_GLPRT_GOTCL_GOTCL_SHIFT)
+#define I40E_GLPRT_ILLERRC(_i) (0x003000E0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_ILLERRC_MAX_INDEX 3
+#define I40E_GLPRT_ILLERRC_ILLERRC_SHIFT 0
+#define I40E_GLPRT_ILLERRC_ILLERRC_MASK (0xFFFFFFFF << I40E_GLPRT_ILLERRC_ILLERRC_SHIFT)
+#define I40E_GLPRT_LDPC(_i) (0x00300620 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_LDPC_MAX_INDEX 3
+#define I40E_GLPRT_LDPC_LDPC_SHIFT 0
+#define I40E_GLPRT_LDPC_LDPC_MASK (0xFFFFFFFF << I40E_GLPRT_LDPC_LDPC_SHIFT)
+#define I40E_GLPRT_LXOFFRXC(_i) (0x00300160 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_LXOFFRXC_MAX_INDEX 3
+#define I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_SHIFT 0
+#define I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_MASK (0xFFFFFFFF << I40E_GLPRT_LXOFFRXC_LXOFFRXCNT_SHIFT)
+#define I40E_GLPRT_LXOFFTXC(_i) (0x003009A0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_LXOFFTXC_MAX_INDEX 3
+#define I40E_GLPRT_LXOFFTXC_LXOFFTXC_SHIFT 0
+#define I40E_GLPRT_LXOFFTXC_LXOFFTXC_MASK (0xFFFFFFFF << I40E_GLPRT_LXOFFTXC_LXOFFTXC_SHIFT)
+#define I40E_GLPRT_LXONRXC(_i) (0x00300140 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_LXONRXC_MAX_INDEX 3
+#define I40E_GLPRT_LXONRXC_LXONRXCNT_SHIFT 0
+#define I40E_GLPRT_LXONRXC_LXONRXCNT_MASK (0xFFFFFFFF << I40E_GLPRT_LXONRXC_LXONRXCNT_SHIFT)
+#define I40E_GLPRT_LXONTXC(_i) (0x00300980 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_LXONTXC_MAX_INDEX 3
+#define I40E_GLPRT_LXONTXC_LXONTXC_SHIFT 0
+#define I40E_GLPRT_LXONTXC_LXONTXC_MASK (0xFFFFFFFF << I40E_GLPRT_LXONTXC_LXONTXC_SHIFT)
+#define I40E_GLPRT_MLFC(_i) (0x00300020 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MLFC_MAX_INDEX 3
+#define I40E_GLPRT_MLFC_MLFC_SHIFT 0
+#define I40E_GLPRT_MLFC_MLFC_MASK (0xFFFFFFFF << I40E_GLPRT_MLFC_MLFC_SHIFT)
+#define I40E_GLPRT_MPRCH(_i) (0x003005C4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MPRCH_MAX_INDEX 3
+#define I40E_GLPRT_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLPRT_MPRCH_MPRCH_MASK (0xFFFF << I40E_GLPRT_MPRCH_MPRCH_SHIFT)
+#define I40E_GLPRT_MPRCL(_i) (0x003005C0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MPRCL_MAX_INDEX 3
+#define I40E_GLPRT_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLPRT_MPRCL_MPRCL_MASK (0xFFFFFFFF << I40E_GLPRT_MPRCL_MPRCL_SHIFT)
+#define I40E_GLPRT_MPTCH(_i) (0x003009E4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MPTCH_MAX_INDEX 3
+#define I40E_GLPRT_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLPRT_MPTCH_MPTCH_MASK (0xFFFF << I40E_GLPRT_MPTCH_MPTCH_SHIFT)
+#define I40E_GLPRT_MPTCL(_i) (0x003009E0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MPTCL_MAX_INDEX 3
+#define I40E_GLPRT_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLPRT_MPTCL_MPTCL_MASK (0xFFFFFFFF << I40E_GLPRT_MPTCL_MPTCL_SHIFT)
+#define I40E_GLPRT_MRFC(_i) (0x00300040 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_MRFC_MAX_INDEX 3
+#define I40E_GLPRT_MRFC_MRFC_SHIFT 0
+#define I40E_GLPRT_MRFC_MRFC_MASK (0xFFFFFFFF << I40E_GLPRT_MRFC_MRFC_SHIFT)
+#define I40E_GLPRT_PRC1023H(_i) (0x00300504 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC1023H_MAX_INDEX 3
+#define I40E_GLPRT_PRC1023H_PRC1023H_SHIFT 0
+#define I40E_GLPRT_PRC1023H_PRC1023H_MASK (0xFFFF << I40E_GLPRT_PRC1023H_PRC1023H_SHIFT)
+#define I40E_GLPRT_PRC1023L(_i) (0x00300500 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC1023L_MAX_INDEX 3
+#define I40E_GLPRT_PRC1023L_PRC1023L_SHIFT 0
+#define I40E_GLPRT_PRC1023L_PRC1023L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC1023L_PRC1023L_SHIFT)
+#define I40E_GLPRT_PRC127H(_i) (0x003004A4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC127H_MAX_INDEX 3
+#define I40E_GLPRT_PRC127H_PRC127H_SHIFT 0
+#define I40E_GLPRT_PRC127H_PRC127H_MASK (0xFFFF << I40E_GLPRT_PRC127H_PRC127H_SHIFT)
+#define I40E_GLPRT_PRC127L(_i) (0x003004A0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC127L_MAX_INDEX 3
+#define I40E_GLPRT_PRC127L_PRC127L_SHIFT 0
+#define I40E_GLPRT_PRC127L_PRC127L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC127L_PRC127L_SHIFT)
+#define I40E_GLPRT_PRC1522H(_i) (0x00300524 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC1522H_MAX_INDEX 3
+#define I40E_GLPRT_PRC1522H_PRC1522H_SHIFT 0
+#define I40E_GLPRT_PRC1522H_PRC1522H_MASK (0xFFFF << I40E_GLPRT_PRC1522H_PRC1522H_SHIFT)
+#define I40E_GLPRT_PRC1522L(_i) (0x00300520 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC1522L_MAX_INDEX 3
+#define I40E_GLPRT_PRC1522L_PRC1522L_SHIFT 0
+#define I40E_GLPRT_PRC1522L_PRC1522L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC1522L_PRC1522L_SHIFT)
+#define I40E_GLPRT_PRC255H(_i) (0x003004C4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC255H_MAX_INDEX 3
+#define I40E_GLPRT_PRC255H_PRTPRC255H_SHIFT 0
+#define I40E_GLPRT_PRC255H_PRTPRC255H_MASK (0xFFFF << I40E_GLPRT_PRC255H_PRTPRC255H_SHIFT)
+#define I40E_GLPRT_PRC255L(_i) (0x003004C0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC255L_MAX_INDEX 3
+#define I40E_GLPRT_PRC255L_PRC255L_SHIFT 0
+#define I40E_GLPRT_PRC255L_PRC255L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC255L_PRC255L_SHIFT)
+#define I40E_GLPRT_PRC511H(_i) (0x003004E4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC511H_MAX_INDEX 3
+#define I40E_GLPRT_PRC511H_PRC511H_SHIFT 0
+#define I40E_GLPRT_PRC511H_PRC511H_MASK (0xFFFF << I40E_GLPRT_PRC511H_PRC511H_SHIFT)
+#define I40E_GLPRT_PRC511L(_i) (0x003004E0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC511L_MAX_INDEX 3
+#define I40E_GLPRT_PRC511L_PRC511L_SHIFT 0
+#define I40E_GLPRT_PRC511L_PRC511L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC511L_PRC511L_SHIFT)
+#define I40E_GLPRT_PRC64H(_i) (0x00300484 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC64H_MAX_INDEX 3
+#define I40E_GLPRT_PRC64H_PRC64H_SHIFT 0
+#define I40E_GLPRT_PRC64H_PRC64H_MASK (0xFFFF << I40E_GLPRT_PRC64H_PRC64H_SHIFT)
+#define I40E_GLPRT_PRC64L(_i) (0x00300480 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC64L_MAX_INDEX 3
+#define I40E_GLPRT_PRC64L_PRC64L_SHIFT 0
+#define I40E_GLPRT_PRC64L_PRC64L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC64L_PRC64L_SHIFT)
+#define I40E_GLPRT_PRC9522H(_i) (0x00300544 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC9522H_MAX_INDEX 3
+#define I40E_GLPRT_PRC9522H_PRC1522H_SHIFT 0
+#define I40E_GLPRT_PRC9522H_PRC1522H_MASK (0xFFFF << I40E_GLPRT_PRC9522H_PRC1522H_SHIFT)
+#define I40E_GLPRT_PRC9522L(_i) (0x00300540 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PRC9522L_MAX_INDEX 3
+#define I40E_GLPRT_PRC9522L_PRC1522L_SHIFT 0
+#define I40E_GLPRT_PRC9522L_PRC1522L_MASK (0xFFFFFFFF << I40E_GLPRT_PRC9522L_PRC1522L_SHIFT)
+#define I40E_GLPRT_PTC1023H(_i) (0x00300724 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC1023H_MAX_INDEX 3
+#define I40E_GLPRT_PTC1023H_PTC1023H_SHIFT 0
+#define I40E_GLPRT_PTC1023H_PTC1023H_MASK (0xFFFF << I40E_GLPRT_PTC1023H_PTC1023H_SHIFT)
+#define I40E_GLPRT_PTC1023L(_i) (0x00300720 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC1023L_MAX_INDEX 3
+#define I40E_GLPRT_PTC1023L_PTC1023L_SHIFT 0
+#define I40E_GLPRT_PTC1023L_PTC1023L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC1023L_PTC1023L_SHIFT)
+#define I40E_GLPRT_PTC127H(_i) (0x003006C4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC127H_MAX_INDEX 3
+#define I40E_GLPRT_PTC127H_PTC127H_SHIFT 0
+#define I40E_GLPRT_PTC127H_PTC127H_MASK (0xFFFF << I40E_GLPRT_PTC127H_PTC127H_SHIFT)
+#define I40E_GLPRT_PTC127L(_i) (0x003006C0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC127L_MAX_INDEX 3
+#define I40E_GLPRT_PTC127L_PTC127L_SHIFT 0
+#define I40E_GLPRT_PTC127L_PTC127L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC127L_PTC127L_SHIFT)
+#define I40E_GLPRT_PTC1522H(_i) (0x00300744 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC1522H_MAX_INDEX 3
+#define I40E_GLPRT_PTC1522H_PTC1522H_SHIFT 0
+#define I40E_GLPRT_PTC1522H_PTC1522H_MASK (0xFFFF << I40E_GLPRT_PTC1522H_PTC1522H_SHIFT)
+#define I40E_GLPRT_PTC1522L(_i) (0x00300740 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC1522L_MAX_INDEX 3
+#define I40E_GLPRT_PTC1522L_PTC1522L_SHIFT 0
+#define I40E_GLPRT_PTC1522L_PTC1522L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC1522L_PTC1522L_SHIFT)
+#define I40E_GLPRT_PTC255H(_i) (0x003006E4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC255H_MAX_INDEX 3
+#define I40E_GLPRT_PTC255H_PTC255H_SHIFT 0
+#define I40E_GLPRT_PTC255H_PTC255H_MASK (0xFFFF << I40E_GLPRT_PTC255H_PTC255H_SHIFT)
+#define I40E_GLPRT_PTC255L(_i) (0x003006E0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC255L_MAX_INDEX 3
+#define I40E_GLPRT_PTC255L_PTC255L_SHIFT 0
+#define I40E_GLPRT_PTC255L_PTC255L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC255L_PTC255L_SHIFT)
+#define I40E_GLPRT_PTC511H(_i) (0x00300704 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC511H_MAX_INDEX 3
+#define I40E_GLPRT_PTC511H_PTC511H_SHIFT 0
+#define I40E_GLPRT_PTC511H_PTC511H_MASK (0xFFFF << I40E_GLPRT_PTC511H_PTC511H_SHIFT)
+#define I40E_GLPRT_PTC511L(_i) (0x00300700 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC511L_MAX_INDEX 3
+#define I40E_GLPRT_PTC511L_PTC511L_SHIFT 0
+#define I40E_GLPRT_PTC511L_PTC511L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC511L_PTC511L_SHIFT)
+#define I40E_GLPRT_PTC64H(_i) (0x003006A4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC64H_MAX_INDEX 3
+#define I40E_GLPRT_PTC64H_PTC64H_SHIFT 0
+#define I40E_GLPRT_PTC64H_PTC64H_MASK (0xFFFF << I40E_GLPRT_PTC64H_PTC64H_SHIFT)
+#define I40E_GLPRT_PTC64L(_i) (0x003006A0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC64L_MAX_INDEX 3
+#define I40E_GLPRT_PTC64L_PTC64L_SHIFT 0
+#define I40E_GLPRT_PTC64L_PTC64L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC64L_PTC64L_SHIFT)
+#define I40E_GLPRT_PTC9522H(_i) (0x00300764 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC9522H_MAX_INDEX 3
+#define I40E_GLPRT_PTC9522H_PTC9522H_SHIFT 0
+#define I40E_GLPRT_PTC9522H_PTC9522H_MASK (0xFFFF << I40E_GLPRT_PTC9522H_PTC9522H_SHIFT)
+#define I40E_GLPRT_PTC9522L(_i) (0x00300760 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_PTC9522L_MAX_INDEX 3
+#define I40E_GLPRT_PTC9522L_PTC9522L_SHIFT 0
+#define I40E_GLPRT_PTC9522L_PTC9522L_MASK (0xFFFFFFFF << I40E_GLPRT_PTC9522L_PTC9522L_SHIFT)
+#define I40E_GLPRT_PXOFFRXC(_i, _j) (0x00300280 + ((_i) * 8 + (_j) * 32))
+#define I40E_GLPRT_PXOFFRXC_MAX_INDEX 3
+#define I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_SHIFT 0
+#define I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_MASK (0xFFFFFFFF << I40E_GLPRT_PXOFFRXC_PRPXOFFRXCNT_SHIFT)
+#define I40E_GLPRT_PXOFFTXC(_i, _j) (0x00300880 + ((_i) * 8 + (_j) * 32))
+#define I40E_GLPRT_PXOFFTXC_MAX_INDEX 3
+#define I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_SHIFT 0
+#define I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_MASK (0xFFFFFFFF << I40E_GLPRT_PXOFFTXC_PRPXOFFTXCNT_SHIFT)
+#define I40E_GLPRT_PXONRXC(_i, _j) (0x00300180 + ((_i) * 8 + (_j) * 32))
+#define I40E_GLPRT_PXONRXC_MAX_INDEX 3
+#define I40E_GLPRT_PXONRXC_PRPXONRXCNT_SHIFT 0
+#define I40E_GLPRT_PXONRXC_PRPXONRXCNT_MASK (0xFFFFFFFF << I40E_GLPRT_PXONRXC_PRPXONRXCNT_SHIFT)
+#define I40E_GLPRT_PXONTXC(_i, _j) (0x00300780 + ((_i) * 8 + (_j) * 32))
+#define I40E_GLPRT_PXONTXC_MAX_INDEX 3
+#define I40E_GLPRT_PXONTXC_PRPXONTXC_SHIFT 0
+#define I40E_GLPRT_PXONTXC_PRPXONTXC_MASK (0xFFFFFFFF << I40E_GLPRT_PXONTXC_PRPXONTXC_SHIFT)
+#define I40E_GLPRT_RDPC(_i) (0x00300600 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RDPC_MAX_INDEX 3
+#define I40E_GLPRT_RDPC_RDPC_SHIFT 0
+#define I40E_GLPRT_RDPC_RDPC_MASK (0xFFFFFFFF << I40E_GLPRT_RDPC_RDPC_SHIFT)
+#define I40E_GLPRT_RFC(_i) (0x00300560 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RFC_MAX_INDEX 3
+#define I40E_GLPRT_RFC_RFC_SHIFT 0
+#define I40E_GLPRT_RFC_RFC_MASK (0xFFFFFFFF << I40E_GLPRT_RFC_RFC_SHIFT)
+#define I40E_GLPRT_RJC(_i) (0x00300580 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RJC_MAX_INDEX 3
+#define I40E_GLPRT_RJC_RJC_SHIFT 0
+#define I40E_GLPRT_RJC_RJC_MASK (0xFFFFFFFF << I40E_GLPRT_RJC_RJC_SHIFT)
+#define I40E_GLPRT_RLEC(_i) (0x003000A0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RLEC_MAX_INDEX 3
+#define I40E_GLPRT_RLEC_RLEC_SHIFT 0
+#define I40E_GLPRT_RLEC_RLEC_MASK (0xFFFFFFFF << I40E_GLPRT_RLEC_RLEC_SHIFT)
+#define I40E_GLPRT_ROC(_i) (0x00300120 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_ROC_MAX_INDEX 3
+#define I40E_GLPRT_ROC_ROC_SHIFT 0
+#define I40E_GLPRT_ROC_ROC_MASK (0xFFFFFFFF << I40E_GLPRT_ROC_ROC_SHIFT)
+#define I40E_GLPRT_RUC(_i) (0x00300100 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RUC_MAX_INDEX 3
+#define I40E_GLPRT_RUC_RUC_SHIFT 0
+#define I40E_GLPRT_RUC_RUC_MASK (0xFFFFFFFF << I40E_GLPRT_RUC_RUC_SHIFT)
+#define I40E_GLPRT_RUPP(_i) (0x00300660 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_RUPP_MAX_INDEX 3
+#define I40E_GLPRT_RUPP_RUPP_SHIFT 0
+#define I40E_GLPRT_RUPP_RUPP_MASK (0xFFFFFFFF << I40E_GLPRT_RUPP_RUPP_SHIFT)
+#define I40E_GLPRT_RXON2OFFCNT(_i, _j) (0x00300380 + ((_i) * 8 + (_j) * 32))
+#define I40E_GLPRT_RXON2OFFCNT_MAX_INDEX 3
+#define I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_SHIFT 0
+#define I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_MASK (0xFFFFFFFF << I40E_GLPRT_RXON2OFFCNT_PRRXON2OFFCNT_SHIFT)
+#define I40E_GLPRT_STDC(_i) (0x00300640 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_STDC_MAX_INDEX 3
+#define I40E_GLPRT_STDC_STDC_SHIFT 0
+#define I40E_GLPRT_STDC_STDC_MASK (0xFFFFFFFF << I40E_GLPRT_STDC_STDC_SHIFT)
+#define I40E_GLPRT_TDOLD(_i) (0x00300A20 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_TDOLD_MAX_INDEX 3
+#define I40E_GLPRT_TDOLD_GLPRT_TDOLD_SHIFT 0
+#define I40E_GLPRT_TDOLD_GLPRT_TDOLD_MASK (0xFFFFFFFF << I40E_GLPRT_TDOLD_GLPRT_TDOLD_SHIFT)
+#define I40E_GLPRT_TDPC(_i) (0x00375400 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_TDPC_MAX_INDEX 3
+#define I40E_GLPRT_TDPC_TDPC_SHIFT 0
+#define I40E_GLPRT_TDPC_TDPC_MASK (0xFFFFFFFF << I40E_GLPRT_TDPC_TDPC_SHIFT)
+#define I40E_GLPRT_UPRCH(_i) (0x003005A4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_UPRCH_MAX_INDEX 3
+#define I40E_GLPRT_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLPRT_UPRCH_UPRCH_MASK (0xFFFF << I40E_GLPRT_UPRCH_UPRCH_SHIFT)
+#define I40E_GLPRT_UPRCL(_i) (0x003005A0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_UPRCL_MAX_INDEX 3
+#define I40E_GLPRT_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLPRT_UPRCL_UPRCL_MASK (0xFFFFFFFF << I40E_GLPRT_UPRCL_UPRCL_SHIFT)
+#define I40E_GLPRT_UPTCH(_i) (0x003009C4 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_UPTCH_MAX_INDEX 3
+#define I40E_GLPRT_UPTCH_UPTCH_SHIFT 0
+#define I40E_GLPRT_UPTCH_UPTCH_MASK (0xFFFF << I40E_GLPRT_UPTCH_UPTCH_SHIFT)
+#define I40E_GLPRT_UPTCL(_i) (0x003009C0 + ((_i) * 8)) /* _i=0...3 */
+#define I40E_GLPRT_UPTCL_MAX_INDEX 3
+#define I40E_GLPRT_UPTCL_VUPTCH_SHIFT 0
+#define I40E_GLPRT_UPTCL_VUPTCH_MASK (0xFFFFFFFF << I40E_GLPRT_UPTCL_VUPTCH_SHIFT)
+#define I40E_GLSW_BPRCH(_i) (0x00370104 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_BPRCH_MAX_INDEX 15
+#define I40E_GLSW_BPRCH_BPRCH_SHIFT 0
+#define I40E_GLSW_BPRCH_BPRCH_MASK (0xFFFF << I40E_GLSW_BPRCH_BPRCH_SHIFT)
+#define I40E_GLSW_BPRCL(_i) (0x00370100 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_BPRCL_MAX_INDEX 15
+#define I40E_GLSW_BPRCL_BPRCL_SHIFT 0
+#define I40E_GLSW_BPRCL_BPRCL_MASK (0xFFFFFFFF << I40E_GLSW_BPRCL_BPRCL_SHIFT)
+#define I40E_GLSW_BPTCH(_i) (0x00340104 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_BPTCH_MAX_INDEX 15
+#define I40E_GLSW_BPTCH_BPTCH_SHIFT 0
+#define I40E_GLSW_BPTCH_BPTCH_MASK (0xFFFF << I40E_GLSW_BPTCH_BPTCH_SHIFT)
+#define I40E_GLSW_BPTCL(_i) (0x00340100 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_BPTCL_MAX_INDEX 15
+#define I40E_GLSW_BPTCL_BPTCL_SHIFT 0
+#define I40E_GLSW_BPTCL_BPTCL_MASK (0xFFFFFFFF << I40E_GLSW_BPTCL_BPTCL_SHIFT)
+#define I40E_GLSW_GORCH(_i) (0x0035C004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_GORCH_MAX_INDEX 15
+#define I40E_GLSW_GORCH_GORCH_SHIFT 0
+#define I40E_GLSW_GORCH_GORCH_MASK (0xFFFF << I40E_GLSW_GORCH_GORCH_SHIFT)
+#define I40E_GLSW_GORCL(_i) (0x0035c000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_GORCL_MAX_INDEX 15
+#define I40E_GLSW_GORCL_GORCL_SHIFT 0
+#define I40E_GLSW_GORCL_GORCL_MASK (0xFFFFFFFF << I40E_GLSW_GORCL_GORCL_SHIFT)
+#define I40E_GLSW_GOTCH(_i) (0x0032C004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_GOTCH_MAX_INDEX 15
+#define I40E_GLSW_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLSW_GOTCH_GOTCH_MASK (0xFFFF << I40E_GLSW_GOTCH_GOTCH_SHIFT)
+#define I40E_GLSW_GOTCL(_i) (0x0032c000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_GOTCL_MAX_INDEX 15
+#define I40E_GLSW_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLSW_GOTCL_GOTCL_MASK (0xFFFFFFFF << I40E_GLSW_GOTCL_GOTCL_SHIFT)
+#define I40E_GLSW_MPRCH(_i) (0x00370084 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_MPRCH_MAX_INDEX 15
+#define I40E_GLSW_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLSW_MPRCH_MPRCH_MASK (0xFFFF << I40E_GLSW_MPRCH_MPRCH_SHIFT)
+#define I40E_GLSW_MPRCL(_i) (0x00370080 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_MPRCL_MAX_INDEX 15
+#define I40E_GLSW_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLSW_MPRCL_MPRCL_MASK (0xFFFFFFFF << I40E_GLSW_MPRCL_MPRCL_SHIFT)
+#define I40E_GLSW_MPTCH(_i) (0x00340084 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_MPTCH_MAX_INDEX 15
+#define I40E_GLSW_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLSW_MPTCH_MPTCH_MASK (0xFFFF << I40E_GLSW_MPTCH_MPTCH_SHIFT)
+#define I40E_GLSW_MPTCL(_i) (0x00340080 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_MPTCL_MAX_INDEX 15
+#define I40E_GLSW_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLSW_MPTCL_MPTCL_MASK (0xFFFFFFFF << I40E_GLSW_MPTCL_MPTCL_SHIFT)
+#define I40E_GLSW_RUPP(_i) (0x00370180 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_RUPP_MAX_INDEX 15
+#define I40E_GLSW_RUPP_RUPP_SHIFT 0
+#define I40E_GLSW_RUPP_RUPP_MASK (0xFFFFFFFF << I40E_GLSW_RUPP_RUPP_SHIFT)
+#define I40E_GLSW_TDPC(_i) (0x00348000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_TDPC_MAX_INDEX 15
+#define I40E_GLSW_TDPC_TDPC_SHIFT 0
+#define I40E_GLSW_TDPC_TDPC_MASK (0xFFFFFFFF << I40E_GLSW_TDPC_TDPC_SHIFT)
+#define I40E_GLSW_UPRCH(_i) (0x00370004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_UPRCH_MAX_INDEX 15
+#define I40E_GLSW_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLSW_UPRCH_UPRCH_MASK (0xFFFF << I40E_GLSW_UPRCH_UPRCH_SHIFT)
+#define I40E_GLSW_UPRCL(_i) (0x00370000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_UPRCL_MAX_INDEX 15
+#define I40E_GLSW_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLSW_UPRCL_UPRCL_MASK (0xFFFFFFFF << I40E_GLSW_UPRCL_UPRCL_SHIFT)
+#define I40E_GLSW_UPTCH(_i) (0x00340004 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_UPTCH_MAX_INDEX 15
+#define I40E_GLSW_UPTCH_UPTCH_SHIFT 0
+#define I40E_GLSW_UPTCH_UPTCH_MASK (0xFFFF << I40E_GLSW_UPTCH_UPTCH_SHIFT)
+#define I40E_GLSW_UPTCL(_i) (0x00340000 + ((_i) * 8)) /* _i=0...15 */
+#define I40E_GLSW_UPTCL_MAX_INDEX 15
+#define I40E_GLSW_UPTCL_UPTCL_SHIFT 0
+#define I40E_GLSW_UPTCL_UPTCL_MASK (0xFFFFFFFF << I40E_GLSW_UPTCL_UPTCL_SHIFT)
+#define I40E_GLV_BPRCH(_i) (0x0036D804 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_BPRCH_MAX_INDEX 383
+#define I40E_GLV_BPRCH_BPRCH_SHIFT 0
+#define I40E_GLV_BPRCH_BPRCH_MASK (0xFFFF << I40E_GLV_BPRCH_BPRCH_SHIFT)
+#define I40E_GLV_BPRCL(_i) (0x0036d800 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_BPRCL_MAX_INDEX 383
+#define I40E_GLV_BPRCL_BPRCL_SHIFT 0
+#define I40E_GLV_BPRCL_BPRCL_MASK (0xFFFFFFFF << I40E_GLV_BPRCL_BPRCL_SHIFT)
+#define I40E_GLV_BPTCH(_i) (0x0033D804 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_BPTCH_MAX_INDEX 383
+#define I40E_GLV_BPTCH_BPTCH_SHIFT 0
+#define I40E_GLV_BPTCH_BPTCH_MASK (0xFFFF << I40E_GLV_BPTCH_BPTCH_SHIFT)
+#define I40E_GLV_BPTCL(_i) (0x0033d800 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_BPTCL_MAX_INDEX 383
+#define I40E_GLV_BPTCL_BPTCL_SHIFT 0
+#define I40E_GLV_BPTCL_BPTCL_MASK (0xFFFFFFFF << I40E_GLV_BPTCL_BPTCL_SHIFT)
+#define I40E_GLV_GORCH(_i) (0x00358004 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_GORCH_MAX_INDEX 383
+#define I40E_GLV_GORCH_GORCH_SHIFT 0
+#define I40E_GLV_GORCH_GORCH_MASK (0xFFFF << I40E_GLV_GORCH_GORCH_SHIFT)
+#define I40E_GLV_GORCL(_i) (0x00358000 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_GORCL_MAX_INDEX 383
+#define I40E_GLV_GORCL_GORCL_SHIFT 0
+#define I40E_GLV_GORCL_GORCL_MASK (0xFFFFFFFF << I40E_GLV_GORCL_GORCL_SHIFT)
+#define I40E_GLV_GOTCH(_i) (0x00328004 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_GOTCH_MAX_INDEX 383
+#define I40E_GLV_GOTCH_GOTCH_SHIFT 0
+#define I40E_GLV_GOTCH_GOTCH_MASK (0xFFFF << I40E_GLV_GOTCH_GOTCH_SHIFT)
+#define I40E_GLV_GOTCL(_i) (0x00328000 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_GOTCL_MAX_INDEX 383
+#define I40E_GLV_GOTCL_GOTCL_SHIFT 0
+#define I40E_GLV_GOTCL_GOTCL_MASK (0xFFFFFFFF << I40E_GLV_GOTCL_GOTCL_SHIFT)
+#define I40E_GLV_MPRCH(_i) (0x0036CC04 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_MPRCH_MAX_INDEX 383
+#define I40E_GLV_MPRCH_MPRCH_SHIFT 0
+#define I40E_GLV_MPRCH_MPRCH_MASK (0xFFFF << I40E_GLV_MPRCH_MPRCH_SHIFT)
+#define I40E_GLV_MPRCL(_i) (0x0036cc00 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_MPRCL_MAX_INDEX 383
+#define I40E_GLV_MPRCL_MPRCL_SHIFT 0
+#define I40E_GLV_MPRCL_MPRCL_MASK (0xFFFFFFFF << I40E_GLV_MPRCL_MPRCL_SHIFT)
+#define I40E_GLV_MPTCH(_i) (0x0033CC04 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_MPTCH_MAX_INDEX 383
+#define I40E_GLV_MPTCH_MPTCH_SHIFT 0
+#define I40E_GLV_MPTCH_MPTCH_MASK (0xFFFF << I40E_GLV_MPTCH_MPTCH_SHIFT)
+#define I40E_GLV_MPTCL(_i) (0x0033cc00 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_MPTCL_MAX_INDEX 383
+#define I40E_GLV_MPTCL_MPTCL_SHIFT 0
+#define I40E_GLV_MPTCL_MPTCL_MASK (0xFFFFFFFF << I40E_GLV_MPTCL_MPTCL_SHIFT)
+#define I40E_GLV_RDPC(_i) (0x00310000 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_RDPC_MAX_INDEX 383
+#define I40E_GLV_RDPC_RDPC_SHIFT 0
+#define I40E_GLV_RDPC_RDPC_MASK (0xFFFFFFFF << I40E_GLV_RDPC_RDPC_SHIFT)
+#define I40E_GLV_RUPP(_i) (0x0036E400 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_RUPP_MAX_INDEX 383
+#define I40E_GLV_RUPP_RUPP_SHIFT 0
+#define I40E_GLV_RUPP_RUPP_MASK (0xFFFFFFFF << I40E_GLV_RUPP_RUPP_SHIFT)
+#define I40E_GLV_TEPC(_VSI) (0x00344000 + ((_VSI) * 8)) /* _i=0...383 */
+#define I40E_GLV_TEPC_MAX_INDEX 383
+#define I40E_GLV_TEPC_TEPC_SHIFT 0
+#define I40E_GLV_TEPC_TEPC_MASK (0xFFFFFFFF << I40E_GLV_TEPC_TEPC_SHIFT)
+#define I40E_GLV_UPRCH(_i) (0x0036C004 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_UPRCH_MAX_INDEX 383
+#define I40E_GLV_UPRCH_UPRCH_SHIFT 0
+#define I40E_GLV_UPRCH_UPRCH_MASK (0xFFFF << I40E_GLV_UPRCH_UPRCH_SHIFT)
+#define I40E_GLV_UPRCL(_i) (0x0036c000 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_UPRCL_MAX_INDEX 383
+#define I40E_GLV_UPRCL_UPRCL_SHIFT 0
+#define I40E_GLV_UPRCL_UPRCL_MASK (0xFFFFFFFF << I40E_GLV_UPRCL_UPRCL_SHIFT)
+#define I40E_GLV_UPTCH(_i) (0x0033C004 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_UPTCH_MAX_INDEX 383
+#define I40E_GLV_UPTCH_GLVUPTCH_SHIFT 0
+#define I40E_GLV_UPTCH_GLVUPTCH_MASK (0xFFFF << I40E_GLV_UPTCH_GLVUPTCH_SHIFT)
+#define I40E_GLV_UPTCL(_i) (0x0033c000 + ((_i) * 8)) /* _i=0...383 */
+#define I40E_GLV_UPTCL_MAX_INDEX 383
+#define I40E_GLV_UPTCL_UPTCL_SHIFT 0
+#define I40E_GLV_UPTCL_UPTCL_MASK (0xFFFFFFFF << I40E_GLV_UPTCL_UPTCL_SHIFT)
+#define I40E_GLVEBTC_RBCH(_i, _j) (0x00364004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_RBCH_MAX_INDEX 7
+#define I40E_GLVEBTC_RBCH_TCBCH_SHIFT 0
+#define I40E_GLVEBTC_RBCH_TCBCH_MASK (0xFFFF << I40E_GLVEBTC_RBCH_TCBCH_SHIFT)
+#define I40E_GLVEBTC_RBCL(_i, _j) (0x00364000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_RBCL_MAX_INDEX 7
+#define I40E_GLVEBTC_RBCL_TCBCL_SHIFT 0
+#define I40E_GLVEBTC_RBCL_TCBCL_MASK (0xFFFFFFFF << I40E_GLVEBTC_RBCL_TCBCL_SHIFT)
+#define I40E_GLVEBTC_RPCH(_i, _j) (0x00368004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_RPCH_MAX_INDEX 7
+#define I40E_GLVEBTC_RPCH_TCPCH_SHIFT 0
+#define I40E_GLVEBTC_RPCH_TCPCH_MASK (0xFFFF << I40E_GLVEBTC_RPCH_TCPCH_SHIFT)
+#define I40E_GLVEBTC_RPCL(_i, _j) (0x00368000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_RPCL_MAX_INDEX 7
+#define I40E_GLVEBTC_RPCL_TCPCL_SHIFT 0
+#define I40E_GLVEBTC_RPCL_TCPCL_MASK (0xFFFFFFFF << I40E_GLVEBTC_RPCL_TCPCL_SHIFT)
+#define I40E_GLVEBTC_TBCH(_i, _j) (0x00334004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_TBCH_MAX_INDEX 7
+#define I40E_GLVEBTC_TBCH_TCBCH_SHIFT 0
+#define I40E_GLVEBTC_TBCH_TCBCH_MASK (0xFFFF << I40E_GLVEBTC_TBCH_TCBCH_SHIFT)
+#define I40E_GLVEBTC_TBCL(_i, _j) (0x00334000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_TBCL_MAX_INDEX 7
+#define I40E_GLVEBTC_TBCL_TCBCL_SHIFT 0
+#define I40E_GLVEBTC_TBCL_TCBCL_MASK (0xFFFFFFFF << I40E_GLVEBTC_TBCL_TCBCL_SHIFT)
+#define I40E_GLVEBTC_TPCH(_i, _j) (0x00338004 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_TPCH_MAX_INDEX 7
+#define I40E_GLVEBTC_TPCH_TCPCH_SHIFT 0
+#define I40E_GLVEBTC_TPCH_TCPCH_MASK (0xFFFF << I40E_GLVEBTC_TPCH_TCPCH_SHIFT)
+#define I40E_GLVEBTC_TPCL(_i, _j) (0x00338000 + ((_i) * 8 + (_j) * 64)) /* _i=0...7, _j=0...15 */
+#define I40E_GLVEBTC_TPCL_MAX_INDEX 7
+#define I40E_GLVEBTC_TPCL_TCPCL_SHIFT 0
+#define I40E_GLVEBTC_TPCL_TCPCL_MASK (0xFFFFFFFF << I40E_GLVEBTC_TPCL_TCPCL_SHIFT)
+#define I40E_GLVEBVL_BPCH(_i) (0x00374804 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_BPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_BPCH_VLBPCH_SHIFT 0
+#define I40E_GLVEBVL_BPCH_VLBPCH_MASK (0xFFFF << I40E_GLVEBVL_BPCH_VLBPCH_SHIFT)
+#define I40E_GLVEBVL_BPCL(_i) (0x00374800 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_BPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_BPCL_VLBPCL_SHIFT 0
+#define I40E_GLVEBVL_BPCL_VLBPCL_MASK (0xFFFFFFFF << I40E_GLVEBVL_BPCL_VLBPCL_SHIFT)
+#define I40E_GLVEBVL_GORCH(_i) (0x00360004 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_GORCH_MAX_INDEX 127
+#define I40E_GLVEBVL_GORCH_VLBCH_SHIFT 0
+#define I40E_GLVEBVL_GORCH_VLBCH_MASK (0xFFFF << I40E_GLVEBVL_GORCH_VLBCH_SHIFT)
+#define I40E_GLVEBVL_GORCL(_i) (0x00360000 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_GORCL_MAX_INDEX 127
+#define I40E_GLVEBVL_GORCL_VLBCL_SHIFT 0
+#define I40E_GLVEBVL_GORCL_VLBCL_MASK (0xFFFFFFFF << I40E_GLVEBVL_GORCL_VLBCL_SHIFT)
+#define I40E_GLVEBVL_GOTCH(_i) (0x00330004 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_GOTCH_MAX_INDEX 127
+#define I40E_GLVEBVL_GOTCH_VLBCH_SHIFT 0
+#define I40E_GLVEBVL_GOTCH_VLBCH_MASK (0xFFFF << I40E_GLVEBVL_GOTCH_VLBCH_SHIFT)
+#define I40E_GLVEBVL_GOTCL(_i) (0x00330000 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_GOTCL_MAX_INDEX 127
+#define I40E_GLVEBVL_GOTCL_VLBCL_SHIFT 0
+#define I40E_GLVEBVL_GOTCL_VLBCL_MASK (0xFFFFFFFF << I40E_GLVEBVL_GOTCL_VLBCL_SHIFT)
+#define I40E_GLVEBVL_MPCH(_i) (0x00374404 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_MPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_MPCH_VLMPCH_SHIFT 0
+#define I40E_GLVEBVL_MPCH_VLMPCH_MASK (0xFFFF << I40E_GLVEBVL_MPCH_VLMPCH_SHIFT)
+#define I40E_GLVEBVL_MPCL(_i) (0x00374400 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_MPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_MPCL_VLMPCL_SHIFT 0
+#define I40E_GLVEBVL_MPCL_VLMPCL_MASK (0xFFFFFFFF << I40E_GLVEBVL_MPCL_VLMPCL_SHIFT)
+#define I40E_GLVEBVL_UPCH(_i) (0x00374004 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_UPCH_MAX_INDEX 127
+#define I40E_GLVEBVL_UPCH_VLUPCH_SHIFT 0
+#define I40E_GLVEBVL_UPCH_VLUPCH_MASK (0xFFFF << I40E_GLVEBVL_UPCH_VLUPCH_SHIFT)
+#define I40E_GLVEBVL_UPCL(_i) (0x00374000 + ((_i) * 8)) /* _i=0...127 */
+#define I40E_GLVEBVL_UPCL_MAX_INDEX 127
+#define I40E_GLVEBVL_UPCL_VLUPCL_SHIFT 0
+#define I40E_GLVEBVL_UPCL_VLUPCL_MASK (0xFFFFFFFF << I40E_GLVEBVL_UPCL_VLUPCL_SHIFT)
+#define I40E_GL_MTG_FLU_MSK_H 0x00269F4C
+#define I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_SHIFT 0
+#define I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_MASK (0xFFFF << I40E_GL_MTG_FLU_MSK_H_MASK_HIGH_SHIFT)
+#define I40E_GL_MTG_FLU_MSK_L 0x00269F44
+#define I40E_GL_MTG_FLU_MSK_L_MASK_LOW_SHIFT 0
+#define I40E_GL_MTG_FLU_MSK_L_MASK_LOW_MASK (0xFFFFFFFF << I40E_GL_MTG_FLU_MSK_L_MASK_LOW_SHIFT)
+#define I40E_GL_SWR_DEF_ACT(_i) (0x0026CF00 + ((_i) * 4)) /* _i=0...25 */
+#define I40E_GL_SWR_DEF_ACT_MAX_INDEX 25
+#define I40E_GL_SWR_DEF_ACT_DEF_ACTION_SHIFT 0
+#define I40E_GL_SWR_DEF_ACT_DEF_ACTION_MASK (0xFFFFFFFF << I40E_GL_SWR_DEF_ACT_DEF_ACTION_SHIFT)
+#define I40E_GL_SWR_DEF_ACT_EN 0x0026CF84
+#define I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_SHIFT 0
+#define I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_MASK (0xFFFFFFFF << I40E_GL_SWR_DEF_ACT_EN_DEF_ACT_EN_BITMAP_SHIFT)
+#define I40E_PRT_MSCCNT 0x00256BA0
+#define I40E_PRT_MSCCNT_CCOUNT_SHIFT 0
+#define I40E_PRT_MSCCNT_CCOUNT_MASK (0x1FFFFFF << I40E_PRT_MSCCNT_CCOUNT_SHIFT)
+#define I40E_PRT_SCSTS 0x00256C20
+#define I40E_PRT_SCSTS_BSCA_SHIFT 0
+#define I40E_PRT_SCSTS_BSCA_MASK (0x1 << I40E_PRT_SCSTS_BSCA_SHIFT)
+#define I40E_PRT_SCSTS_BSCAP_SHIFT 1
+#define I40E_PRT_SCSTS_BSCAP_MASK (0x1 << I40E_PRT_SCSTS_BSCAP_SHIFT)
+#define I40E_PRT_SCSTS_MSCA_SHIFT 2
+#define I40E_PRT_SCSTS_MSCA_MASK (0x1 << I40E_PRT_SCSTS_MSCA_SHIFT)
+#define I40E_PRT_SCSTS_MSCAP_SHIFT 3
+#define I40E_PRT_SCSTS_MSCAP_MASK (0x1 << I40E_PRT_SCSTS_MSCAP_SHIFT)
+#define I40E_PRT_SWT_BSCCNT 0x00256C60
+#define I40E_PRT_SWT_BSCCNT_CCOUNT_SHIFT 0
+#define I40E_PRT_SWT_BSCCNT_CCOUNT_MASK (0x1FFFFFF << I40E_PRT_SWT_BSCCNT_CCOUNT_SHIFT)
+#define I40E_PRTTSYN_ADJ 0x001E4280
+#define I40E_PRTTSYN_ADJ_TSYNADJ_SHIFT 0
+#define I40E_PRTTSYN_ADJ_TSYNADJ_MASK (0x7FFFFFFF << I40E_PRTTSYN_ADJ_TSYNADJ_SHIFT)
+#define I40E_PRTTSYN_ADJ_SIGN_SHIFT 31
+#define I40E_PRTTSYN_ADJ_SIGN_MASK (0x1 << I40E_PRTTSYN_ADJ_SIGN_SHIFT)
+#define I40E_PRTTSYN_AUX_0(_i) (0x001E42A0 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_AUX_0_MAX_INDEX 1
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT 0
+#define I40E_PRTTSYN_AUX_0_OUT_ENA_MASK (0x1 << I40E_PRTTSYN_AUX_0_OUT_ENA_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT 1
+#define I40E_PRTTSYN_AUX_0_OUTMOD_MASK (0x3 << I40E_PRTTSYN_AUX_0_OUTMOD_SHIFT)
+#define I40E_PRTTSYN_AUX_0_OUTLVL_SHIFT 3
+#define I40E_PRTTSYN_AUX_0_OUTLVL_MASK (0x1 << I40E_PRTTSYN_AUX_0_OUTLVL_SHIFT)
+#define I40E_PRTTSYN_AUX_0_PULSEW_SHIFT 8
+#define I40E_PRTTSYN_AUX_0_PULSEW_MASK (0xF << I40E_PRTTSYN_AUX_0_PULSEW_SHIFT)
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT 16
+#define I40E_PRTTSYN_AUX_0_EVNTLVL_MASK (0x3 << I40E_PRTTSYN_AUX_0_EVNTLVL_SHIFT)
+#define I40E_PRTTSYN_AUX_1(_i) (0x001E42E0 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_AUX_1_MAX_INDEX 1
+#define I40E_PRTTSYN_AUX_1_INSTNT_SHIFT 0
+#define I40E_PRTTSYN_AUX_1_INSTNT_MASK (0x1 << I40E_PRTTSYN_AUX_1_INSTNT_SHIFT)
+#define I40E_PRTTSYN_AUX_1_SAMPLE_TIME_SHIFT 1
+#define I40E_PRTTSYN_AUX_1_SAMPLE_TIME_MASK (0x1 << I40E_PRTTSYN_AUX_1_SAMPLE_TIME_SHIFT)
+#define I40E_PRTTSYN_CLKO(_i) (0x001E4240 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_CLKO_MAX_INDEX 1
+#define I40E_PRTTSYN_CLKO_TSYNCLKO_SHIFT 0
+#define I40E_PRTTSYN_CLKO_TSYNCLKO_MASK (0xFFFFFFFF << I40E_PRTTSYN_CLKO_TSYNCLKO_SHIFT)
+#define I40E_PRTTSYN_CTL0 0x001E4200
+#define I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_SHIFT 0
+#define I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_MASK (0x1 << I40E_PRTTSYN_CTL0_CLEAR_TSYNTIMER_SHIFT)
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT 1
+#define I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_MASK (0x1 << I40E_PRTTSYN_CTL0_TXTIME_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT 2
+#define I40E_PRTTSYN_CTL0_EVENT_INT_ENA_MASK (0x1 << I40E_PRTTSYN_CTL0_EVENT_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_TGT_INT_ENA_SHIFT 3
+#define I40E_PRTTSYN_CTL0_TGT_INT_ENA_MASK (0x1 << I40E_PRTTSYN_CTL0_TGT_INT_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL0_PF_ID_SHIFT 8
+#define I40E_PRTTSYN_CTL0_PF_ID_MASK (0xF << I40E_PRTTSYN_CTL0_PF_ID_SHIFT)
+#define I40E_PRTTSYN_CTL0_TSYNACT_SHIFT 12
+#define I40E_PRTTSYN_CTL0_TSYNACT_MASK (0x3 << I40E_PRTTSYN_CTL0_TSYNACT_SHIFT)
+#define I40E_PRTTSYN_CTL0_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL0_TSYNENA_MASK (0x1 << I40E_PRTTSYN_CTL0_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_CTL1 0x00085020
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT 0
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE0_MASK (0xFF << I40E_PRTTSYN_CTL1_V1MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE1_SHIFT 8
+#define I40E_PRTTSYN_CTL1_V1MESSTYPE1_MASK (0xFF << I40E_PRTTSYN_CTL1_V1MESSTYPE1_SHIFT)
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT 16
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE0_MASK (0xF << I40E_PRTTSYN_CTL1_V2MESSTYPE0_SHIFT)
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE1_SHIFT 20
+#define I40E_PRTTSYN_CTL1_V2MESSTYPE1_MASK (0xF << I40E_PRTTSYN_CTL1_V2MESSTYPE1_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT 24
+#define I40E_PRTTSYN_CTL1_TSYNTYPE_MASK (0x3 << I40E_PRTTSYN_CTL1_TSYNTYPE_SHIFT)
+#define I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT 26
+#define I40E_PRTTSYN_CTL1_UDP_ENA_MASK (0x3 << I40E_PRTTSYN_CTL1_UDP_ENA_SHIFT)
+#define I40E_PRTTSYN_CTL1_TSYNENA_SHIFT 31
+#define I40E_PRTTSYN_CTL1_TSYNENA_MASK (0x1 << I40E_PRTTSYN_CTL1_TSYNENA_SHIFT)
+#define I40E_PRTTSYN_EVNT_H(_i) (0x001E40C0 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_EVNT_H_MAX_INDEX 1
+#define I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_SHIFT 0
+#define I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_MASK (0xFFFFFFFF << I40E_PRTTSYN_EVNT_H_TSYNEVNT_H_SHIFT)
+#define I40E_PRTTSYN_EVNT_L(_i) (0x001E4080 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_EVNT_L_MAX_INDEX 1
+#define I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_SHIFT 0
+#define I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_EVNT_L_TSYNEVNT_L_SHIFT)
+#define I40E_PRTTSYN_INC_H 0x001E4060
+#define I40E_PRTTSYN_INC_H_TSYNINC_H_SHIFT 0
+#define I40E_PRTTSYN_INC_H_TSYNINC_H_MASK (0x3F << I40E_PRTTSYN_INC_H_TSYNINC_H_SHIFT)
+#define I40E_PRTTSYN_INC_L 0x001E4040
+#define I40E_PRTTSYN_INC_L_TSYNINC_L_SHIFT 0
+#define I40E_PRTTSYN_INC_L_TSYNINC_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_INC_L_TSYNINC_L_SHIFT)
+#define I40E_PRTTSYN_RXTIME_H(_i) (0x00085040 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRTTSYN_RXTIME_H_MAX_INDEX 3
+#define I40E_PRTTSYN_RXTIME_H_RXTIEM_H_SHIFT 0
+#define I40E_PRTTSYN_RXTIME_H_RXTIEM_H_MASK (0xFFFFFFFF << I40E_PRTTSYN_RXTIME_H_RXTIEM_H_SHIFT)
+#define I40E_PRTTSYN_RXTIME_L(_i) (0x000850C0 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRTTSYN_RXTIME_L_MAX_INDEX 3
+#define I40E_PRTTSYN_RXTIME_L_RXTIEM_L_SHIFT 0
+#define I40E_PRTTSYN_RXTIME_L_RXTIEM_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_RXTIME_L_RXTIEM_L_SHIFT)
+#define I40E_PRTTSYN_STAT_0 0x001E4220
+#define I40E_PRTTSYN_STAT_0_EVENT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_0_EVENT0_MASK (0x1 << I40E_PRTTSYN_STAT_0_EVENT0_SHIFT)
+#define I40E_PRTTSYN_STAT_0_EVENT1_SHIFT 1
+#define I40E_PRTTSYN_STAT_0_EVENT1_MASK (0x1 << I40E_PRTTSYN_STAT_0_EVENT1_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TGT0_SHIFT 2
+#define I40E_PRTTSYN_STAT_0_TGT0_MASK (0x1 << I40E_PRTTSYN_STAT_0_TGT0_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TGT1_SHIFT 3
+#define I40E_PRTTSYN_STAT_0_TGT1_MASK (0x1 << I40E_PRTTSYN_STAT_0_TGT1_SHIFT)
+#define I40E_PRTTSYN_STAT_0_TXTIME_SHIFT 4
+#define I40E_PRTTSYN_STAT_0_TXTIME_MASK (0x1 << I40E_PRTTSYN_STAT_0_TXTIME_SHIFT)
+#define I40E_PRTTSYN_STAT_1 0x00085140
+#define I40E_PRTTSYN_STAT_1_RXT0_SHIFT 0
+#define I40E_PRTTSYN_STAT_1_RXT0_MASK (0x1 << I40E_PRTTSYN_STAT_1_RXT0_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT1_SHIFT 1
+#define I40E_PRTTSYN_STAT_1_RXT1_MASK (0x1 << I40E_PRTTSYN_STAT_1_RXT1_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT2_SHIFT 2
+#define I40E_PRTTSYN_STAT_1_RXT2_MASK (0x1 << I40E_PRTTSYN_STAT_1_RXT2_SHIFT)
+#define I40E_PRTTSYN_STAT_1_RXT3_SHIFT 3
+#define I40E_PRTTSYN_STAT_1_RXT3_MASK (0x1 << I40E_PRTTSYN_STAT_1_RXT3_SHIFT)
+#define I40E_PRTTSYN_TGT_H(_i) (0x001E4180 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_TGT_H_MAX_INDEX 1
+#define I40E_PRTTSYN_TGT_H_TSYNTGTT_H_SHIFT 0
+#define I40E_PRTTSYN_TGT_H_TSYNTGTT_H_MASK (0xFFFFFFFF << I40E_PRTTSYN_TGT_H_TSYNTGTT_H_SHIFT)
+#define I40E_PRTTSYN_TGT_L(_i) (0x001E4140 + ((_i) * 32)) /* _i=0...1 */
+#define I40E_PRTTSYN_TGT_L_MAX_INDEX 1
+#define I40E_PRTTSYN_TGT_L_TSYNTGTT_L_SHIFT 0
+#define I40E_PRTTSYN_TGT_L_TSYNTGTT_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_TGT_L_TSYNTGTT_L_SHIFT)
+#define I40E_PRTTSYN_TIME_H 0x001E4120
+#define I40E_PRTTSYN_TIME_H_TSYNTIME_H_SHIFT 0
+#define I40E_PRTTSYN_TIME_H_TSYNTIME_H_MASK (0xFFFFFFFF << I40E_PRTTSYN_TIME_H_TSYNTIME_H_SHIFT)
+#define I40E_PRTTSYN_TIME_L 0x001E4100
+#define I40E_PRTTSYN_TIME_L_TSYNTIME_L_SHIFT 0
+#define I40E_PRTTSYN_TIME_L_TSYNTIME_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_TIME_L_TSYNTIME_L_SHIFT)
+#define I40E_PRTTSYN_TXTIME_H 0x001E41E0
+#define I40E_PRTTSYN_TXTIME_H_TXTIEM_H_SHIFT 0
+#define I40E_PRTTSYN_TXTIME_H_TXTIEM_H_MASK (0xFFFFFFFF << I40E_PRTTSYN_TXTIME_H_TXTIEM_H_SHIFT)
+#define I40E_PRTTSYN_TXTIME_L 0x001E41C0
+#define I40E_PRTTSYN_TXTIME_L_TXTIEM_L_SHIFT 0
+#define I40E_PRTTSYN_TXTIME_L_TXTIEM_L_MASK (0xFFFFFFFF << I40E_PRTTSYN_TXTIME_L_TXTIEM_L_SHIFT)
+#define I40E_GLSCD_QUANTA 0x000B2080
+#define I40E_GLSCD_QUANTA_TSCDQUANTA_SHIFT 0
+#define I40E_GLSCD_QUANTA_TSCDQUANTA_MASK (0x7 << I40E_GLSCD_QUANTA_TSCDQUANTA_SHIFT)
+#define I40E_GL_MDET_RX 0x0012A510
+#define I40E_GL_MDET_RX_FUNCTION_SHIFT 0
+#define I40E_GL_MDET_RX_FUNCTION_MASK (0xFF << I40E_GL_MDET_RX_FUNCTION_SHIFT)
+#define I40E_GL_MDET_RX_EVENT_SHIFT 8
+#define I40E_GL_MDET_RX_EVENT_MASK (0x1FF << I40E_GL_MDET_RX_EVENT_SHIFT)
+#define I40E_GL_MDET_RX_QUEUE_SHIFT 17
+#define I40E_GL_MDET_RX_QUEUE_MASK (0x3FFF << I40E_GL_MDET_RX_QUEUE_SHIFT)
+#define I40E_GL_MDET_RX_VALID_SHIFT 31
+#define I40E_GL_MDET_RX_VALID_MASK (0x1 << I40E_GL_MDET_RX_VALID_SHIFT)
+#define I40E_GL_MDET_TX 0x000E6480
+#define I40E_GL_MDET_TX_FUNCTION_SHIFT 0
+#define I40E_GL_MDET_TX_FUNCTION_MASK (0xFF << I40E_GL_MDET_TX_FUNCTION_SHIFT)
+#define I40E_GL_MDET_TX_EVENT_SHIFT 8
+#define I40E_GL_MDET_TX_EVENT_MASK (0x1FF << I40E_GL_MDET_TX_EVENT_SHIFT)
+#define I40E_GL_MDET_TX_QUEUE_SHIFT 17
+#define I40E_GL_MDET_TX_QUEUE_MASK (0x3FFF << I40E_GL_MDET_TX_QUEUE_SHIFT)
+#define I40E_GL_MDET_TX_VALID_SHIFT 31
+#define I40E_GL_MDET_TX_VALID_MASK (0x1 << I40E_GL_MDET_TX_VALID_SHIFT)
+#define I40E_PF_MDET_RX 0x0012A400
+#define I40E_PF_MDET_RX_VALID_SHIFT 0
+#define I40E_PF_MDET_RX_VALID_MASK (0x1 << I40E_PF_MDET_RX_VALID_SHIFT)
+#define I40E_PF_MDET_TX 0x000E6400
+#define I40E_PF_MDET_TX_VALID_SHIFT 0
+#define I40E_PF_MDET_TX_VALID_MASK (0x1 << I40E_PF_MDET_TX_VALID_SHIFT)
+#define I40E_PF_VT_PFALLOC 0x001C0500
+#define I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT 0
+#define I40E_PF_VT_PFALLOC_FIRSTVF_MASK (0xFF << I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_LASTVF_SHIFT 8
+#define I40E_PF_VT_PFALLOC_LASTVF_MASK (0xFF << I40E_PF_VT_PFALLOC_LASTVF_SHIFT)
+#define I40E_PF_VT_PFALLOC_VALID_SHIFT 31
+#define I40E_PF_VT_PFALLOC_VALID_MASK (0x1 << I40E_PF_VT_PFALLOC_VALID_SHIFT)
+#define I40E_VP_MDET_RX(_VF) (0x0012A000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VP_MDET_RX_MAX_INDEX 127
+#define I40E_VP_MDET_RX_VALID_SHIFT 0
+#define I40E_VP_MDET_RX_VALID_MASK (0x1 << I40E_VP_MDET_RX_VALID_SHIFT)
+#define I40E_VP_MDET_TX(_VF) (0x000E6000 + ((_VF) * 4)) /* _i=0...127 */
+#define I40E_VP_MDET_TX_MAX_INDEX 127
+#define I40E_VP_MDET_TX_VALID_SHIFT 0
+#define I40E_VP_MDET_TX_VALID_MASK (0x1 << I40E_VP_MDET_TX_VALID_SHIFT)
+#define I40E_GLPM_WUMC 0x0006C800
+#define I40E_GLPM_WUMC_NOTCO_SHIFT 0
+#define I40E_GLPM_WUMC_NOTCO_MASK (0x1 << I40E_GLPM_WUMC_NOTCO_SHIFT)
+#define I40E_GLPM_WUMC_SRST_PIN_VAL_SHIFT 1
+#define I40E_GLPM_WUMC_SRST_PIN_VAL_MASK (0x1 << I40E_GLPM_WUMC_SRST_PIN_VAL_SHIFT)
+#define I40E_GLPM_WUMC_ROL_MODE_SHIFT 2
+#define I40E_GLPM_WUMC_ROL_MODE_MASK (0x1 << I40E_GLPM_WUMC_ROL_MODE_SHIFT)
+#define I40E_GLPM_WUMC_RESERVED_4_SHIFT 3
+#define I40E_GLPM_WUMC_RESERVED_4_MASK (0x1FFF << I40E_GLPM_WUMC_RESERVED_4_SHIFT)
+#define I40E_GLPM_WUMC_MNG_WU_PF_SHIFT 16
+#define I40E_GLPM_WUMC_MNG_WU_PF_MASK (0xFFFF << I40E_GLPM_WUMC_MNG_WU_PF_SHIFT)
+#define I40E_PFPM_APM 0x000B8080
+#define I40E_PFPM_APM_APME_SHIFT 0
+#define I40E_PFPM_APM_APME_MASK (0x1 << I40E_PFPM_APM_APME_SHIFT)
+#define I40E_PFPM_FHFT_DATA(_i, _j) (0x00060000 + ((_i) * 4096 + (_j) * 128))
+#define I40E_PFPM_FHFT_DATA_MAX_INDEX 7
+#define I40E_PFPM_FHFT_DATA_DWORD_SHIFT 0
+#define I40E_PFPM_FHFT_DATA_DWORD_MASK (0xFFFFFFFF << I40E_PFPM_FHFT_DATA_DWORD_SHIFT)
+#define I40E_PFPM_FHFT_LENGTH(_i) (0x0006A000 + ((_i) * 128)) /* _i=0...7 */
+#define I40E_PFPM_FHFT_LENGTH_MAX_INDEX 7
+#define I40E_PFPM_FHFT_LENGTH_LENGTH_SHIFT 0
+#define I40E_PFPM_FHFT_LENGTH_LENGTH_MASK (0xFF << I40E_PFPM_FHFT_LENGTH_LENGTH_SHIFT)
+#define I40E_PFPM_FHFT_MASK(_i, _j) (0x00068000 + ((_i) * 1024 + (_j) * 128))
+#define I40E_PFPM_FHFT_MASK_MAX_INDEX 7
+#define I40E_PFPM_FHFT_MASK_MASK_SHIFT 0
+#define I40E_PFPM_FHFT_MASK_MASK_MASK (0xFFFF << I40E_PFPM_FHFT_MASK_MASK_SHIFT)
+#define I40E_PFPM_PROXYFC 0x00245A80
+#define I40E_PFPM_PROXYFC_PPROXYE_SHIFT 0
+#define I40E_PFPM_PROXYFC_PPROXYE_MASK (0x1 << I40E_PFPM_PROXYFC_PPROXYE_SHIFT)
+#define I40E_PFPM_PROXYFC_EX_SHIFT 1
+#define I40E_PFPM_PROXYFC_EX_MASK (0x1 << I40E_PFPM_PROXYFC_EX_SHIFT)
+#define I40E_PFPM_PROXYFC_ARP_SHIFT 4
+#define I40E_PFPM_PROXYFC_ARP_MASK (0x1 << I40E_PFPM_PROXYFC_ARP_SHIFT)
+#define I40E_PFPM_PROXYFC_ARP_DIRECTED_SHIFT 5
+#define I40E_PFPM_PROXYFC_ARP_DIRECTED_MASK (0x1 << I40E_PFPM_PROXYFC_ARP_DIRECTED_SHIFT)
+#define I40E_PFPM_PROXYFC_NS_SHIFT 9
+#define I40E_PFPM_PROXYFC_NS_MASK (0x1 << I40E_PFPM_PROXYFC_NS_SHIFT)
+#define I40E_PFPM_PROXYFC_NS_DIRECTED_SHIFT 10
+#define I40E_PFPM_PROXYFC_NS_DIRECTED_MASK (0x1 << I40E_PFPM_PROXYFC_NS_DIRECTED_SHIFT)
+#define I40E_PFPM_PROXYFC_MLD_SHIFT 12
+#define I40E_PFPM_PROXYFC_MLD_MASK (0x1 << I40E_PFPM_PROXYFC_MLD_SHIFT)
+#define I40E_PFPM_PROXYS 0x00245B80
+#define I40E_PFPM_PROXYS_EX_SHIFT 1
+#define I40E_PFPM_PROXYS_EX_MASK (0x1 << I40E_PFPM_PROXYS_EX_SHIFT)
+#define I40E_PFPM_PROXYS_ARP_SHIFT 4
+#define I40E_PFPM_PROXYS_ARP_MASK (0x1 << I40E_PFPM_PROXYS_ARP_SHIFT)
+#define I40E_PFPM_PROXYS_ARP_DIRECTED_SHIFT 5
+#define I40E_PFPM_PROXYS_ARP_DIRECTED_MASK (0x1 << I40E_PFPM_PROXYS_ARP_DIRECTED_SHIFT)
+#define I40E_PFPM_PROXYS_NS_SHIFT 9
+#define I40E_PFPM_PROXYS_NS_MASK (0x1 << I40E_PFPM_PROXYS_NS_SHIFT)
+#define I40E_PFPM_PROXYS_NS_DIRECTED_SHIFT 10
+#define I40E_PFPM_PROXYS_NS_DIRECTED_MASK (0x1 << I40E_PFPM_PROXYS_NS_DIRECTED_SHIFT)
+#define I40E_PFPM_PROXYS_MLD_SHIFT 12
+#define I40E_PFPM_PROXYS_MLD_MASK (0x1 << I40E_PFPM_PROXYS_MLD_SHIFT)
+#define I40E_PFPM_WUC 0x0006B200
+#define I40E_PFPM_WUC_EN_APM_D0_SHIFT 5
+#define I40E_PFPM_WUC_EN_APM_D0_MASK (0x1 << I40E_PFPM_WUC_EN_APM_D0_SHIFT)
+#define I40E_PFPM_WUFC 0x0006B400
+#define I40E_PFPM_WUFC_LNKC_SHIFT 0
+#define I40E_PFPM_WUFC_LNKC_MASK (0x1 << I40E_PFPM_WUFC_LNKC_SHIFT)
+#define I40E_PFPM_WUFC_MAG_SHIFT 1
+#define I40E_PFPM_WUFC_MAG_MASK (0x1 << I40E_PFPM_WUFC_MAG_SHIFT)
+#define I40E_PFPM_WUFC_MNG_SHIFT 3
+#define I40E_PFPM_WUFC_MNG_MASK (0x1 << I40E_PFPM_WUFC_MNG_SHIFT)
+#define I40E_PFPM_WUFC_FLX0_ACT_SHIFT 4
+#define I40E_PFPM_WUFC_FLX0_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX0_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX1_ACT_SHIFT 5
+#define I40E_PFPM_WUFC_FLX1_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX1_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX2_ACT_SHIFT 6
+#define I40E_PFPM_WUFC_FLX2_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX2_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX3_ACT_SHIFT 7
+#define I40E_PFPM_WUFC_FLX3_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX3_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX4_ACT_SHIFT 8
+#define I40E_PFPM_WUFC_FLX4_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX4_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX5_ACT_SHIFT 9
+#define I40E_PFPM_WUFC_FLX5_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX5_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX6_ACT_SHIFT 10
+#define I40E_PFPM_WUFC_FLX6_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX6_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX7_ACT_SHIFT 11
+#define I40E_PFPM_WUFC_FLX7_ACT_MASK (0x1 << I40E_PFPM_WUFC_FLX7_ACT_SHIFT)
+#define I40E_PFPM_WUFC_FLX0_SHIFT 16
+#define I40E_PFPM_WUFC_FLX0_MASK (0x1 << I40E_PFPM_WUFC_FLX0_SHIFT)
+#define I40E_PFPM_WUFC_FLX1_SHIFT 17
+#define I40E_PFPM_WUFC_FLX1_MASK (0x1 << I40E_PFPM_WUFC_FLX1_SHIFT)
+#define I40E_PFPM_WUFC_FLX2_SHIFT 18
+#define I40E_PFPM_WUFC_FLX2_MASK (0x1 << I40E_PFPM_WUFC_FLX2_SHIFT)
+#define I40E_PFPM_WUFC_FLX3_SHIFT 19
+#define I40E_PFPM_WUFC_FLX3_MASK (0x1 << I40E_PFPM_WUFC_FLX3_SHIFT)
+#define I40E_PFPM_WUFC_FLX4_SHIFT 20
+#define I40E_PFPM_WUFC_FLX4_MASK (0x1 << I40E_PFPM_WUFC_FLX4_SHIFT)
+#define I40E_PFPM_WUFC_FLX5_SHIFT 21
+#define I40E_PFPM_WUFC_FLX5_MASK (0x1 << I40E_PFPM_WUFC_FLX5_SHIFT)
+#define I40E_PFPM_WUFC_FLX6_SHIFT 22
+#define I40E_PFPM_WUFC_FLX6_MASK (0x1 << I40E_PFPM_WUFC_FLX6_SHIFT)
+#define I40E_PFPM_WUFC_FLX7_SHIFT 23
+#define I40E_PFPM_WUFC_FLX7_MASK (0x1 << I40E_PFPM_WUFC_FLX7_SHIFT)
+#define I40E_PFPM_WUFC_FW_RST_WK_SHIFT 31
+#define I40E_PFPM_WUFC_FW_RST_WK_MASK (0x1 << I40E_PFPM_WUFC_FW_RST_WK_SHIFT)
+#define I40E_PFPM_WUS 0x0006B600
+#define I40E_PFPM_WUS_LNKC_SHIFT 0
+#define I40E_PFPM_WUS_LNKC_MASK (0x1 << I40E_PFPM_WUS_LNKC_SHIFT)
+#define I40E_PFPM_WUS_MAG_SHIFT 1
+#define I40E_PFPM_WUS_MAG_MASK (0x1 << I40E_PFPM_WUS_MAG_SHIFT)
+#define I40E_PFPM_WUS_PME_STATUS_SHIFT 2
+#define I40E_PFPM_WUS_PME_STATUS_MASK (0x1 << I40E_PFPM_WUS_PME_STATUS_SHIFT)
+#define I40E_PFPM_WUS_MNG_SHIFT 3
+#define I40E_PFPM_WUS_MNG_MASK (0x1 << I40E_PFPM_WUS_MNG_SHIFT)
+#define I40E_PFPM_WUS_FLX0_SHIFT 16
+#define I40E_PFPM_WUS_FLX0_MASK (0x1 << I40E_PFPM_WUS_FLX0_SHIFT)
+#define I40E_PFPM_WUS_FLX1_SHIFT 17
+#define I40E_PFPM_WUS_FLX1_MASK (0x1 << I40E_PFPM_WUS_FLX1_SHIFT)
+#define I40E_PFPM_WUS_FLX2_SHIFT 18
+#define I40E_PFPM_WUS_FLX2_MASK (0x1 << I40E_PFPM_WUS_FLX2_SHIFT)
+#define I40E_PFPM_WUS_FLX3_SHIFT 19
+#define I40E_PFPM_WUS_FLX3_MASK (0x1 << I40E_PFPM_WUS_FLX3_SHIFT)
+#define I40E_PFPM_WUS_FLX4_SHIFT 20
+#define I40E_PFPM_WUS_FLX4_MASK (0x1 << I40E_PFPM_WUS_FLX4_SHIFT)
+#define I40E_PFPM_WUS_FLX5_SHIFT 21
+#define I40E_PFPM_WUS_FLX5_MASK (0x1 << I40E_PFPM_WUS_FLX5_SHIFT)
+#define I40E_PFPM_WUS_FLX6_SHIFT 22
+#define I40E_PFPM_WUS_FLX6_MASK (0x1 << I40E_PFPM_WUS_FLX6_SHIFT)
+#define I40E_PFPM_WUS_FLX7_SHIFT 23
+#define I40E_PFPM_WUS_FLX7_MASK (0x1 << I40E_PFPM_WUS_FLX7_SHIFT)
+#define I40E_PFPM_WUS_FW_RST_WK_SHIFT 31
+#define I40E_PFPM_WUS_FW_RST_WK_MASK (0x1 << I40E_PFPM_WUS_FW_RST_WK_SHIFT)
+#define I40E_PRTPM_FHFHR 0x0006C000
+#define I40E_PRTPM_FHFHR_UNICAST_SHIFT 0
+#define I40E_PRTPM_FHFHR_UNICAST_MASK (0x1 << I40E_PRTPM_FHFHR_UNICAST_SHIFT)
+#define I40E_PRTPM_FHFHR_MULTICAST_SHIFT 1
+#define I40E_PRTPM_FHFHR_MULTICAST_MASK (0x1 << I40E_PRTPM_FHFHR_MULTICAST_SHIFT)
+#define I40E_PRTPM_SAH(_i) (0x001E44C0 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRTPM_SAH_MAX_INDEX 3
+#define I40E_PRTPM_SAH_PFPM_SAH_SHIFT 0
+#define I40E_PRTPM_SAH_PFPM_SAH_MASK (0xFFFF << I40E_PRTPM_SAH_PFPM_SAH_SHIFT)
+#define I40E_PRTPM_SAH_PF_NUM_SHIFT 26
+#define I40E_PRTPM_SAH_PF_NUM_MASK (0xF << I40E_PRTPM_SAH_PF_NUM_SHIFT)
+#define I40E_PRTPM_SAH_MC_MAG_EN_SHIFT 30
+#define I40E_PRTPM_SAH_MC_MAG_EN_MASK (0x1 << I40E_PRTPM_SAH_MC_MAG_EN_SHIFT)
+#define I40E_PRTPM_SAH_AV_SHIFT 31
+#define I40E_PRTPM_SAH_AV_MASK (0x1 << I40E_PRTPM_SAH_AV_SHIFT)
+#define I40E_PRTPM_SAL(_i) (0x001E4440 + ((_i) * 32)) /* _i=0...3 */
+#define I40E_PRTPM_SAL_MAX_INDEX 3
+#define I40E_PRTPM_SAL_PFPM_SAL_SHIFT 0
+#define I40E_PRTPM_SAL_PFPM_SAL_MASK (0xFFFFFFFF << I40E_PRTPM_SAL_PFPM_SAL_SHIFT)
+#define I40E_VF_ARQBAH1 0x00006000
+#define I40E_VF_ARQBAH1_ARQBAH_SHIFT 0
+#define I40E_VF_ARQBAH1_ARQBAH_MASK (0xFFFFFFFF << I40E_VF_ARQBAH1_ARQBAH_SHIFT)
+#define I40E_VF_ARQBAL1 0x00006C00
+#define I40E_VF_ARQBAL1_ARQBAL_SHIFT 0
+#define I40E_VF_ARQBAL1_ARQBAL_MASK (0xFFFFFFFF << I40E_VF_ARQBAL1_ARQBAL_SHIFT)
+#define I40E_VF_ARQH1 0x00007400
+#define I40E_VF_ARQH1_ARQH_SHIFT 0
+#define I40E_VF_ARQH1_ARQH_MASK (0x3FF << I40E_VF_ARQH1_ARQH_SHIFT)
+#define I40E_VF_ARQLEN1 0x00008000
+#define I40E_VF_ARQLEN1_ARQLEN_SHIFT 0
+#define I40E_VF_ARQLEN1_ARQLEN_MASK (0x3FF << I40E_VF_ARQLEN1_ARQLEN_SHIFT)
+#define I40E_VF_ARQLEN1_ARQVFE_SHIFT 28
+#define I40E_VF_ARQLEN1_ARQVFE_MASK (0x1 << I40E_VF_ARQLEN1_ARQVFE_SHIFT)
+#define I40E_VF_ARQLEN1_ARQOVFL_SHIFT 29
+#define I40E_VF_ARQLEN1_ARQOVFL_MASK (0x1 << I40E_VF_ARQLEN1_ARQOVFL_SHIFT)
+#define I40E_VF_ARQLEN1_ARQCRIT_SHIFT 30
+#define I40E_VF_ARQLEN1_ARQCRIT_MASK (0x1 << I40E_VF_ARQLEN1_ARQCRIT_SHIFT)
+#define I40E_VF_ARQLEN1_ARQENABLE_SHIFT 31
+#define I40E_VF_ARQLEN1_ARQENABLE_MASK (0x1 << I40E_VF_ARQLEN1_ARQENABLE_SHIFT)
+#define I40E_VF_ARQT1 0x00007000
+#define I40E_VF_ARQT1_ARQT_SHIFT 0
+#define I40E_VF_ARQT1_ARQT_MASK (0x3FF << I40E_VF_ARQT1_ARQT_SHIFT)
+#define I40E_VF_ATQBAH1 0x00007800
+#define I40E_VF_ATQBAH1_ATQBAH_SHIFT 0
+#define I40E_VF_ATQBAH1_ATQBAH_MASK (0xFFFFFFFF << I40E_VF_ATQBAH1_ATQBAH_SHIFT)
+#define I40E_VF_ATQBAL1 0x00007C00
+#define I40E_VF_ATQBAL1_ATQBAL_SHIFT 0
+#define I40E_VF_ATQBAL1_ATQBAL_MASK (0xFFFFFFFF << I40E_VF_ATQBAL1_ATQBAL_SHIFT)
+#define I40E_VF_ATQH1 0x00006400
+#define I40E_VF_ATQH1_ATQH_SHIFT 0
+#define I40E_VF_ATQH1_ATQH_MASK (0x3FF << I40E_VF_ATQH1_ATQH_SHIFT)
+#define I40E_VF_ATQLEN1 0x00006800
+#define I40E_VF_ATQLEN1_ATQLEN_SHIFT 0
+#define I40E_VF_ATQLEN1_ATQLEN_MASK (0x3FF << I40E_VF_ATQLEN1_ATQLEN_SHIFT)
+#define I40E_VF_ATQLEN1_ATQVFE_SHIFT 28
+#define I40E_VF_ATQLEN1_ATQVFE_MASK (0x1 << I40E_VF_ATQLEN1_ATQVFE_SHIFT)
+#define I40E_VF_ATQLEN1_ATQOVFL_SHIFT 29
+#define I40E_VF_ATQLEN1_ATQOVFL_MASK (0x1 << I40E_VF_ATQLEN1_ATQOVFL_SHIFT)
+#define I40E_VF_ATQLEN1_ATQCRIT_SHIFT 30
+#define I40E_VF_ATQLEN1_ATQCRIT_MASK (0x1 << I40E_VF_ATQLEN1_ATQCRIT_SHIFT)
+#define I40E_VF_ATQLEN1_ATQENABLE_SHIFT 31
+#define I40E_VF_ATQLEN1_ATQENABLE_MASK (0x1 << I40E_VF_ATQLEN1_ATQENABLE_SHIFT)
+#define I40E_VF_ATQT1 0x00008400
+#define I40E_VF_ATQT1_ATQT_SHIFT 0
+#define I40E_VF_ATQT1_ATQT_MASK (0x3FF << I40E_VF_ATQT1_ATQT_SHIFT)
+#define I40E_VFGEN_RSTAT 0x00008800
+#define I40E_VFGEN_RSTAT_VFR_STATE_SHIFT 0
+#define I40E_VFGEN_RSTAT_VFR_STATE_MASK (0x3 << I40E_VFGEN_RSTAT_VFR_STATE_SHIFT)
+#define I40E_VFINT_DYN_CTL01 0x00005C00
+#define I40E_VFINT_DYN_CTL01_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTL01_INTENA_MASK (0x1 << I40E_VFINT_DYN_CTL01_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTL01_CLEARPBA_MASK (0x1 << I40E_VFINT_DYN_CTL01_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTL01_SWINT_TRIG_MASK (0x1 << I40E_VFINT_DYN_CTL01_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTL01_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTL01_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTL01_INTERVAL_MASK (0xFFF << I40E_VFINT_DYN_CTL01_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_MASK (0x1 << I40E_VFINT_DYN_CTL01_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTL01_SW_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTL01_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTL01_INTENA_MSK_MASK (0x1 << I40E_VFINT_DYN_CTL01_INTENA_MSK_SHIFT)
+#define I40E_VFINT_DYN_CTLN1(_INTVF) (0x00003800 + ((_INTVF) * 4))
+#define I40E_VFINT_DYN_CTLN1_MAX_INDEX 15
+#define I40E_VFINT_DYN_CTLN1_INTENA_SHIFT 0
+#define I40E_VFINT_DYN_CTLN1_INTENA_MASK (0x1 << I40E_VFINT_DYN_CTLN1_INTENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT 1
+#define I40E_VFINT_DYN_CTLN1_CLEARPBA_MASK (0x1 << I40E_VFINT_DYN_CTLN1_CLEARPBA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT 2
+#define I40E_VFINT_DYN_CTLN1_SWINT_TRIG_MASK (0x1 << I40E_VFINT_DYN_CTLN1_SWINT_TRIG_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT 3
+#define I40E_VFINT_DYN_CTLN1_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTLN1_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT 5
+#define I40E_VFINT_DYN_CTLN1_INTERVAL_MASK (0xFFF << I40E_VFINT_DYN_CTLN1_INTERVAL_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT 24
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK (0x1 << I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT 25
+#define I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_MASK (0x3 << I40E_VFINT_DYN_CTLN1_SW_ITR_INDX_SHIFT)
+#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT 31
+#define I40E_VFINT_DYN_CTLN1_INTENA_MSK_MASK (0x1 << I40E_VFINT_DYN_CTLN1_INTENA_MSK_SHIFT)
+#define I40E_VFINT_ICR0_ENA1 0x00005000
+#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_MASK (0x1 << I40E_VFINT_ICR0_ENA1_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR0_ENA1_ADMINQ_MASK (0x1 << I40E_VFINT_ICR0_ENA1_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR0_ENA1_RSVD_SHIFT 31
+#define I40E_VFINT_ICR0_ENA1_RSVD_MASK (0x1 << I40E_VFINT_ICR0_ENA1_RSVD_SHIFT)
+#define I40E_VFINT_ICR01 0x00004800
+#define I40E_VFINT_ICR01_INTEVENT_SHIFT 0
+#define I40E_VFINT_ICR01_INTEVENT_MASK (0x1 << I40E_VFINT_ICR01_INTEVENT_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_0_SHIFT 1
+#define I40E_VFINT_ICR01_QUEUE_0_MASK (0x1 << I40E_VFINT_ICR01_QUEUE_0_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_1_SHIFT 2
+#define I40E_VFINT_ICR01_QUEUE_1_MASK (0x1 << I40E_VFINT_ICR01_QUEUE_1_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_2_SHIFT 3
+#define I40E_VFINT_ICR01_QUEUE_2_MASK (0x1 << I40E_VFINT_ICR01_QUEUE_2_SHIFT)
+#define I40E_VFINT_ICR01_QUEUE_3_SHIFT 4
+#define I40E_VFINT_ICR01_QUEUE_3_MASK (0x1 << I40E_VFINT_ICR01_QUEUE_3_SHIFT)
+#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT 25
+#define I40E_VFINT_ICR01_LINK_STAT_CHANGE_MASK (0x1 << I40E_VFINT_ICR01_LINK_STAT_CHANGE_SHIFT)
+#define I40E_VFINT_ICR01_ADMINQ_SHIFT 30
+#define I40E_VFINT_ICR01_ADMINQ_MASK (0x1 << I40E_VFINT_ICR01_ADMINQ_SHIFT)
+#define I40E_VFINT_ICR01_SWINT_SHIFT 31
+#define I40E_VFINT_ICR01_SWINT_MASK (0x1 << I40E_VFINT_ICR01_SWINT_SHIFT)
+#define I40E_VFINT_ITR01(_i) (0x00004C00 + ((_i) * 4)) /* _i=0...2 */
+#define I40E_VFINT_ITR01_MAX_INDEX 2
+#define I40E_VFINT_ITR01_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITR01_INTERVAL_MASK (0xFFF << I40E_VFINT_ITR01_INTERVAL_SHIFT)
+#define I40E_VFINT_ITRN1(_i, _INTVF) (0x00002800 + ((_i) * 64 + (_INTVF) * 4))
+#define I40E_VFINT_ITRN1_MAX_INDEX 2
+#define I40E_VFINT_ITRN1_INTERVAL_SHIFT 0
+#define I40E_VFINT_ITRN1_INTERVAL_MASK (0xFFF << I40E_VFINT_ITRN1_INTERVAL_SHIFT)
+#define I40E_VFINT_STAT_CTL01 0x00005400
+#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT 2
+#define I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_MASK (0x3 << I40E_VFINT_STAT_CTL01_OTHER_ITR_INDX_SHIFT)
+#define I40E_QRX_TAIL1(_Q) (0x00002000 + ((_Q) * 4)) /* _i=0...15 */
+#define I40E_QRX_TAIL1_MAX_INDEX 15
+#define I40E_QRX_TAIL1_TAIL_SHIFT 0
+#define I40E_QRX_TAIL1_TAIL_MASK (0x1FFF << I40E_QRX_TAIL1_TAIL_SHIFT)
+#define I40E_QTX_TAIL1(_Q) (0x00000000 + ((_Q) * 4)) /* _i=0...15 */
+#define I40E_QTX_TAIL1_MAX_INDEX 15
+#define I40E_QTX_TAIL1_TAIL_SHIFT 0
+#define I40E_QTX_TAIL1_TAIL_MASK (0x1FFF << I40E_QTX_TAIL1_TAIL_SHIFT)
+#define I40E_VFMSIX_PBA 0x00002000
+#define I40E_VFMSIX_PBA_PENBIT_SHIFT 0
+#define I40E_VFMSIX_PBA_PENBIT_MASK (0xFFFFFFFF << I40E_VFMSIX_PBA_PENBIT_SHIFT)
+#define I40E_VFMSIX_TADD(_i) (0x00000008 + ((_i) * 16)) /* _i=0...16 */
+#define I40E_VFMSIX_TADD_MAX_INDEX 16
+#define I40E_VFMSIX_TADD_MSIXTADD10_SHIFT 0
+#define I40E_VFMSIX_TADD_MSIXTADD10_MASK (0x3 << I40E_VFMSIX_TADD_MSIXTADD10_SHIFT)
+#define I40E_VFMSIX_TADD_MSIXTADD_SHIFT 2
+#define I40E_VFMSIX_TADD_MSIXTADD_MASK (0x3FFFFFFF << I40E_VFMSIX_TADD_MSIXTADD_SHIFT)
+#define I40E_VFMSIX_TMSG(_i) (0x0000000C + ((_i) * 16)) /* _i=0...16 */
+#define I40E_VFMSIX_TMSG_MAX_INDEX 16
+#define I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT 0
+#define I40E_VFMSIX_TMSG_MSIXTMSG_MASK (0xFFFFFFFF << I40E_VFMSIX_TMSG_MSIXTMSG_SHIFT)
+#define I40E_VFMSIX_TUADD(_i) (0x00000000 + ((_i) * 16)) /* _i=0...16 */
+#define I40E_VFMSIX_TUADD_MAX_INDEX 16
+#define I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT 0
+#define I40E_VFMSIX_TUADD_MSIXTUADD_MASK (0xFFFFFFFF << I40E_VFMSIX_TUADD_MSIXTUADD_SHIFT)
+#define I40E_VFMSIX_TVCTRL(_i) (0x00000004 + ((_i) * 16)) /* _i=0...16 */
+#define I40E_VFMSIX_TVCTRL_MAX_INDEX 16
+#define I40E_VFMSIX_TVCTRL_MASK_SHIFT 0
+#define I40E_VFMSIX_TVCTRL_MASK_MASK (0x1 << I40E_VFMSIX_TVCTRL_MASK_SHIFT)
+#define I40E_VFCM_PE_ERRDATA 0x0000DC00
+#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT 0
+#define I40E_VFCM_PE_ERRDATA_ERROR_CODE_MASK (0xF << I40E_VFCM_PE_ERRDATA_ERROR_CODE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT 4
+#define I40E_VFCM_PE_ERRDATA_Q_TYPE_MASK (0x7 << I40E_VFCM_PE_ERRDATA_Q_TYPE_SHIFT)
+#define I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT 8
+#define I40E_VFCM_PE_ERRDATA_Q_NUM_MASK (0x3FFFF << I40E_VFCM_PE_ERRDATA_Q_NUM_SHIFT)
+#define I40E_VFCM_PE_ERRINFO 0x0000D800
+#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT 0
+#define I40E_VFCM_PE_ERRINFO_ERROR_VALID_MASK (0x1 << I40E_VFCM_PE_ERRINFO_ERROR_VALID_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT 4
+#define I40E_VFCM_PE_ERRINFO_ERROR_INST_MASK (0x7 << I40E_VFCM_PE_ERRINFO_ERROR_INST_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT 8
+#define I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO_DBL_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT 16
+#define I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO_RLU_ERROR_CNT_SHIFT)
+#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT 24
+#define I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_MASK (0xFF << I40E_VFCM_PE_ERRINFO_RLS_ERROR_CNT_SHIFT)
+#define I40E_VFPE_AEQALLOC1 0x0000A400
+#define I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT 0
+#define I40E_VFPE_AEQALLOC1_AECOUNT_MASK (0xFFFFFFFF << I40E_VFPE_AEQALLOC1_AECOUNT_SHIFT)
+#define I40E_VFPE_CCQPHIGH1 0x00009800
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT 0
+#define I40E_VFPE_CCQPHIGH1_PECCQPHIGH_MASK (0xFFFFFFFF << I40E_VFPE_CCQPHIGH1_PECCQPHIGH_SHIFT)
+#define I40E_VFPE_CCQPLOW1 0x0000AC00
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT 0
+#define I40E_VFPE_CCQPLOW1_PECCQPLOW_MASK (0xFFFFFFFF << I40E_VFPE_CCQPLOW1_PECCQPLOW_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1 0x0000B800
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT 0
+#define I40E_VFPE_CCQPSTATUS1_CCQP_DONE_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_DONE_SHIFT)
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT 31
+#define I40E_VFPE_CCQPSTATUS1_CCQP_ERR_MASK (0x1 << I40E_VFPE_CCQPSTATUS1_CCQP_ERR_SHIFT)
+#define I40E_VFPE_CQACK1 0x0000B000
+#define I40E_VFPE_CQACK1_PECQID_SHIFT 0
+#define I40E_VFPE_CQACK1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQACK1_PECQID_SHIFT)
+#define I40E_VFPE_CQARM1 0x0000B400
+#define I40E_VFPE_CQARM1_PECQID_SHIFT 0
+#define I40E_VFPE_CQARM1_PECQID_MASK (0x1FFFF << I40E_VFPE_CQARM1_PECQID_SHIFT)
+#define I40E_VFPE_CQPDB1 0x0000BC00
+#define I40E_VFPE_CQPDB1_WQHEAD_SHIFT 0
+#define I40E_VFPE_CQPDB1_WQHEAD_MASK (0x7FF << I40E_VFPE_CQPDB1_WQHEAD_SHIFT)
+#define I40E_VFPE_CQPERRCODES1 0x00009C00
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT 0
+#define I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MINOR_CODE_SHIFT)
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT 16
+#define I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_MASK (0xFFFF << I40E_VFPE_CQPERRCODES1_CQP_MAJOR_CODE_SHIFT)
+#define I40E_VFPE_CQPTAIL1 0x0000A000
+#define I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT 0
+#define I40E_VFPE_CQPTAIL1_WQTAIL_MASK (0x7FF << I40E_VFPE_CQPTAIL1_WQTAIL_SHIFT)
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT 31
+#define I40E_VFPE_CQPTAIL1_CQP_OP_ERR_MASK (0x1 << I40E_VFPE_CQPTAIL1_CQP_OP_ERR_SHIFT)
+#define I40E_VFPE_IPCONFIG01 0x00008C00
+#define I40E_VFPE_IPCONFIG01_PEIPID_SHIFT 0
+#define I40E_VFPE_IPCONFIG01_PEIPID_MASK (0xFFFF << I40E_VFPE_IPCONFIG01_PEIPID_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT 16
+#define I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG01_USEENTIREIDRANGE_SHIFT)
+#define I40E_VFPE_IPCONFIG01_USEUPPERIDRANGE_SHIFT 17
+#define I40E_VFPE_IPCONFIG01_USEUPPERIDRANGE_MASK (0x1 << I40E_VFPE_IPCONFIG01_USEUPPERIDRANGE_SHIFT)
+#define I40E_VFPE_MRTEIDXMASK1 0x00009000
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT 0
+#define I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_MASK (0x1F << I40E_VFPE_MRTEIDXMASK1_MRTEIDXMASKBITS_SHIFT)
+#define I40E_VFPE_RCVUNEXPECTEDERROR1 0x00009400
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT 0
+#define I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_MASK (0xFFFFFF << I40E_VFPE_RCVUNEXPECTEDERROR1_TCP_RX_UNEXP_ERR_SHIFT)
+#define I40E_VFPE_TCPNOWTIMER1 0x0000A800
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT 0
+#define I40E_VFPE_TCPNOWTIMER1_TCP_NOW_MASK (0xFFFFFFFF << I40E_VFPE_TCPNOWTIMER1_TCP_NOW_SHIFT)
+#define I40E_VFPE_WQEALLOC1 0x0000C000
+#define I40E_VFPE_WQEALLOC1_PEQPID_SHIFT 0
+#define I40E_VFPE_WQEALLOC1_PEQPID_MASK (0x3FFFF << I40E_VFPE_WQEALLOC1_PEQPID_SHIFT)
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT 20
+#define I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_MASK (0xFFF << I40E_VFPE_WQEALLOC1_WQE_DESC_INDEX_SHIFT)
+#define I40E_VFQF_HENA(_i) (0x0000C400 + ((_i) * 4)) /* _i=0...1 */
+#define I40E_VFQF_HENA_MAX_INDEX 1
+#define I40E_VFQF_HENA_PTYPE_ENA_SHIFT 0
+#define I40E_VFQF_HENA_PTYPE_ENA_MASK (0xFFFFFFFF << I40E_VFQF_HENA_PTYPE_ENA_SHIFT)
+#define I40E_VFQF_HKEY(_i) (0x0000CC00 + ((_i) * 4)) /* _i=0...12 */
+#define I40E_VFQF_HKEY_MAX_INDEX 12
+#define I40E_VFQF_HKEY_KEY_0_SHIFT 0
+#define I40E_VFQF_HKEY_KEY_0_MASK (0xFF << I40E_VFQF_HKEY_KEY_0_SHIFT)
+#define I40E_VFQF_HKEY_KEY_1_SHIFT 8
+#define I40E_VFQF_HKEY_KEY_1_MASK (0xFF << I40E_VFQF_HKEY_KEY_1_SHIFT)
+#define I40E_VFQF_HKEY_KEY_2_SHIFT 16
+#define I40E_VFQF_HKEY_KEY_2_MASK (0xFF << I40E_VFQF_HKEY_KEY_2_SHIFT)
+#define I40E_VFQF_HKEY_KEY_3_SHIFT 24
+#define I40E_VFQF_HKEY_KEY_3_MASK (0xFF << I40E_VFQF_HKEY_KEY_3_SHIFT)
+#define I40E_VFQF_HLUT(_i) (0x0000D000 + ((_i) * 4)) /* _i=0...15 */
+#define I40E_VFQF_HLUT_MAX_INDEX 15
+#define I40E_VFQF_HLUT_LUT0_SHIFT 0
+#define I40E_VFQF_HLUT_LUT0_MASK (0xF << I40E_VFQF_HLUT_LUT0_SHIFT)
+#define I40E_VFQF_HLUT_LUT1_SHIFT 8
+#define I40E_VFQF_HLUT_LUT1_MASK (0xF << I40E_VFQF_HLUT_LUT1_SHIFT)
+#define I40E_VFQF_HLUT_LUT2_SHIFT 16
+#define I40E_VFQF_HLUT_LUT2_MASK (0xF << I40E_VFQF_HLUT_LUT2_SHIFT)
+#define I40E_VFQF_HLUT_LUT3_SHIFT 24
+#define I40E_VFQF_HLUT_LUT3_MASK (0xF << I40E_VFQF_HLUT_LUT3_SHIFT)
+#define I40E_VFQF_HREGION(_i) (0x0000D400 + ((_i) * 4)) /* _i=0...7 */
+#define I40E_VFQF_HREGION_MAX_INDEX 7
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT 0
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_0_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_0_SHIFT)
+#define I40E_VFQF_HREGION_REGION_0_SHIFT 1
+#define I40E_VFQF_HREGION_REGION_0_MASK (0x7 << I40E_VFQF_HREGION_REGION_0_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT 4
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_1_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_1_SHIFT)
+#define I40E_VFQF_HREGION_REGION_1_SHIFT 5
+#define I40E_VFQF_HREGION_REGION_1_MASK (0x7 << I40E_VFQF_HREGION_REGION_1_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT 8
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_2_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_2_SHIFT)
+#define I40E_VFQF_HREGION_REGION_2_SHIFT 9
+#define I40E_VFQF_HREGION_REGION_2_MASK (0x7 << I40E_VFQF_HREGION_REGION_2_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT 12
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_3_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_3_SHIFT)
+#define I40E_VFQF_HREGION_REGION_3_SHIFT 13
+#define I40E_VFQF_HREGION_REGION_3_MASK (0x7 << I40E_VFQF_HREGION_REGION_3_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT 16
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_4_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_4_SHIFT)
+#define I40E_VFQF_HREGION_REGION_4_SHIFT 17
+#define I40E_VFQF_HREGION_REGION_4_MASK (0x7 << I40E_VFQF_HREGION_REGION_4_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT 20
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_5_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_5_SHIFT)
+#define I40E_VFQF_HREGION_REGION_5_SHIFT 21
+#define I40E_VFQF_HREGION_REGION_5_MASK (0x7 << I40E_VFQF_HREGION_REGION_5_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT 24
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_6_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_6_SHIFT)
+#define I40E_VFQF_HREGION_REGION_6_SHIFT 25
+#define I40E_VFQF_HREGION_REGION_6_MASK (0x7 << I40E_VFQF_HREGION_REGION_6_SHIFT)
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT 28
+#define I40E_VFQF_HREGION_OVERRIDE_ENA_7_MASK (0x1 << I40E_VFQF_HREGION_OVERRIDE_ENA_7_SHIFT)
+#define I40E_VFQF_HREGION_REGION_7_SHIFT 29
+#define I40E_VFQF_HREGION_REGION_7_MASK (0x7 << I40E_VFQF_HREGION_REGION_7_SHIFT)
+
+#endif
diff --git a/drivers/net/ethernet/intel/i40e/i40e_status.h b/drivers/net/ethernet/intel/i40e/i40e_status.h
new file mode 100644 (file)
index 0000000..5e5bcdd
--- /dev/null
@@ -0,0 +1,101 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_STATUS_H_
+#define _I40E_STATUS_H_
+
+/* Error Codes */
+enum i40e_status_code {
+       I40E_SUCCESS                            = 0,
+       I40E_ERR_NVM                            = -1,
+       I40E_ERR_NVM_CHECKSUM                   = -2,
+       I40E_ERR_PHY                            = -3,
+       I40E_ERR_CONFIG                         = -4,
+       I40E_ERR_PARAM                          = -5,
+       I40E_ERR_MAC_TYPE                       = -6,
+       I40E_ERR_UNKNOWN_PHY                    = -7,
+       I40E_ERR_LINK_SETUP                     = -8,
+       I40E_ERR_ADAPTER_STOPPED                = -9,
+       I40E_ERR_INVALID_MAC_ADDR               = -10,
+       I40E_ERR_DEVICE_NOT_SUPPORTED           = -11,
+       I40E_ERR_MASTER_REQUESTS_PENDING        = -12,
+       I40E_ERR_INVALID_LINK_SETTINGS          = -13,
+       I40E_ERR_AUTONEG_NOT_COMPLETE           = -14,
+       I40E_ERR_RESET_FAILED                   = -15,
+       I40E_ERR_SWFW_SYNC                      = -16,
+       I40E_ERR_NO_AVAILABLE_VSI               = -17,
+       I40E_ERR_NO_MEMORY                      = -18,
+       I40E_ERR_BAD_PTR                        = -19,
+       I40E_ERR_RING_FULL                      = -20,
+       I40E_ERR_INVALID_PD_ID                  = -21,
+       I40E_ERR_INVALID_QP_ID                  = -22,
+       I40E_ERR_INVALID_CQ_ID                  = -23,
+       I40E_ERR_INVALID_CEQ_ID                 = -24,
+       I40E_ERR_INVALID_AEQ_ID                 = -25,
+       I40E_ERR_INVALID_SIZE                   = -26,
+       I40E_ERR_INVALID_ARP_INDEX              = -27,
+       I40E_ERR_INVALID_FPM_FUNC_ID            = -28,
+       I40E_ERR_QP_INVALID_MSG_SIZE            = -29,
+       I40E_ERR_QP_TOOMANY_WRS_POSTED          = -30,
+       I40E_ERR_INVALID_FRAG_COUNT             = -31,
+       I40E_ERR_QUEUE_EMPTY                    = -32,
+       I40E_ERR_INVALID_ALIGNMENT              = -33,
+       I40E_ERR_FLUSHED_QUEUE                  = -34,
+       I40E_ERR_INVALID_PUSH_PAGE_INDEX        = -35,
+       I40E_ERR_INVALID_IMM_DATA_SIZE          = -36,
+       I40E_ERR_TIMEOUT                        = -37,
+       I40E_ERR_OPCODE_MISMATCH                = -38,
+       I40E_ERR_CQP_COMPL_ERROR                = -39,
+       I40E_ERR_INVALID_VF_ID                  = -40,
+       I40E_ERR_INVALID_HMCFN_ID               = -41,
+       I40E_ERR_BACKING_PAGE_ERROR             = -42,
+       I40E_ERR_NO_PBLCHUNKS_AVAILABLE         = -43,
+       I40E_ERR_INVALID_PBLE_INDEX             = -44,
+       I40E_ERR_INVALID_SD_INDEX               = -45,
+       I40E_ERR_INVALID_PAGE_DESC_INDEX        = -46,
+       I40E_ERR_INVALID_SD_TYPE                = -47,
+       I40E_ERR_MEMCPY_FAILED                  = -48,
+       I40E_ERR_INVALID_HMC_OBJ_INDEX          = -49,
+       I40E_ERR_INVALID_HMC_OBJ_COUNT          = -50,
+       I40E_ERR_INVALID_SRQ_ARM_LIMIT          = -51,
+       I40E_ERR_SRQ_ENABLED                    = -52,
+       I40E_ERR_ADMIN_QUEUE_ERROR              = -53,
+       I40E_ERR_ADMIN_QUEUE_TIMEOUT            = -54,
+       I40E_ERR_BUF_TOO_SHORT                  = -55,
+       I40E_ERR_ADMIN_QUEUE_FULL               = -56,
+       I40E_ERR_ADMIN_QUEUE_NO_WORK            = -57,
+       I40E_ERR_BAD_IWARP_CQE                  = -58,
+       I40E_ERR_NVM_BLANK_MODE                 = -59,
+       I40E_ERR_NOT_IMPLEMENTED                = -60,
+       I40E_ERR_PE_DOORBELL_NOT_ENABLED        = -61,
+       I40E_ERR_DIAG_TEST_FAILED               = -62,
+       I40E_ERR_NOT_READY                      = -63,
+       I40E_NOT_SUPPORTED                      = -64,
+       I40E_ERR_FIRMWARE_API_VERSION           = -65,
+};
+
+#endif /* _I40E_STATUS_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
new file mode 100644 (file)
index 0000000..49d2cfa
--- /dev/null
@@ -0,0 +1,1817 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e.h"
+
+static inline __le64 build_ctob(u32 td_cmd, u32 td_offset, unsigned int size,
+                               u32 td_tag)
+{
+       return cpu_to_le64(I40E_TX_DESC_DTYPE_DATA |
+                          ((u64)td_cmd  << I40E_TXD_QW1_CMD_SHIFT) |
+                          ((u64)td_offset << I40E_TXD_QW1_OFFSET_SHIFT) |
+                          ((u64)size  << I40E_TXD_QW1_TX_BUF_SZ_SHIFT) |
+                          ((u64)td_tag  << I40E_TXD_QW1_L2TAG1_SHIFT));
+}
+
+/**
+ * i40e_program_fdir_filter - Program a Flow Director filter
+ * @fdir_input: Packet data that will be filter parameters
+ * @pf: The pf pointer
+ * @add: True for add/update, False for remove
+ **/
+int i40e_program_fdir_filter(struct i40e_fdir_data *fdir_data,
+                            struct i40e_pf *pf, bool add)
+{
+       struct i40e_filter_program_desc *fdir_desc;
+       struct i40e_tx_buffer *tx_buf;
+       struct i40e_tx_desc *tx_desc;
+       struct i40e_ring *tx_ring;
+       struct i40e_vsi *vsi;
+       struct device *dev;
+       dma_addr_t dma;
+       u32 td_cmd = 0;
+       u16 i;
+
+       /* find existing FDIR VSI */
+       vsi = NULL;
+       for (i = 0; i < pf->hw.func_caps.num_vsis; i++)
+               if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR)
+                       vsi = pf->vsi[i];
+       if (!vsi)
+               return -ENOENT;
+
+       tx_ring = &vsi->tx_rings[0];
+       dev = tx_ring->dev;
+
+       dma = dma_map_single(dev, fdir_data->raw_packet,
+                               I40E_FDIR_MAX_RAW_PACKET_LOOKUP, DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma))
+               goto dma_fail;
+
+       /* grab the next descriptor */
+       fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use);
+       tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use];
+       tx_ring->next_to_use++;
+       if (tx_ring->next_to_use == tx_ring->count)
+               tx_ring->next_to_use = 0;
+
+       fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32((fdir_data->q_index
+                                            << I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
+                                            & I40E_TXD_FLTR_QW0_QINDEX_MASK);
+
+       fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->flex_off
+                                           << I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
+                                           & I40E_TXD_FLTR_QW0_FLEXOFF_MASK);
+
+       fdir_desc->qindex_flex_ptype_vsi |= cpu_to_le32((fdir_data->pctype
+                                            << I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
+                                            & I40E_TXD_FLTR_QW0_PCTYPE_MASK);
+
+       /* Use LAN VSI Id if not programmed by user */
+       if (fdir_data->dest_vsi == 0)
+               fdir_desc->qindex_flex_ptype_vsi |=
+                                         cpu_to_le32((pf->vsi[pf->lan_vsi]->id)
+                                          << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT);
+       else
+               fdir_desc->qindex_flex_ptype_vsi |=
+                                           cpu_to_le32((fdir_data->dest_vsi
+                                           << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
+                                           & I40E_TXD_FLTR_QW0_DEST_VSI_MASK);
+
+       fdir_desc->dtype_cmd_cntindex =
+                                   cpu_to_le32(I40E_TX_DESC_DTYPE_FILTER_PROG);
+
+       if (add)
+               fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
+                                      I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE
+                                       << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+       else
+               fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
+                                          I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE
+                                          << I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+
+       fdir_desc->dtype_cmd_cntindex |= cpu_to_le32((fdir_data->dest_ctl
+                                         << I40E_TXD_FLTR_QW1_DEST_SHIFT)
+                                         & I40E_TXD_FLTR_QW1_DEST_MASK);
+
+       fdir_desc->dtype_cmd_cntindex |= cpu_to_le32(
+                    (fdir_data->fd_status << I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
+                     & I40E_TXD_FLTR_QW1_FD_STATUS_MASK);
+
+       if (fdir_data->cnt_index != 0) {
+               fdir_desc->dtype_cmd_cntindex |=
+                                   cpu_to_le32(I40E_TXD_FLTR_QW1_CNT_ENA_MASK);
+               fdir_desc->dtype_cmd_cntindex |=
+                                           cpu_to_le32((fdir_data->cnt_index
+                                           << I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
+                                           & I40E_TXD_FLTR_QW1_CNTINDEX_MASK);
+       }
+
+       fdir_desc->fd_id = cpu_to_le32(fdir_data->fd_id);
+
+       /* Now program a dummy descriptor */
+       tx_desc = I40E_TX_DESC(tx_ring, tx_ring->next_to_use);
+       tx_buf = &tx_ring->tx_bi[tx_ring->next_to_use];
+       tx_ring->next_to_use++;
+       if (tx_ring->next_to_use == tx_ring->count)
+               tx_ring->next_to_use = 0;
+
+       tx_desc->buffer_addr = cpu_to_le64(dma);
+       td_cmd = I40E_TX_DESC_CMD_EOP |
+                I40E_TX_DESC_CMD_RS  |
+                I40E_TX_DESC_CMD_DUMMY;
+
+       tx_desc->cmd_type_offset_bsz =
+               build_ctob(td_cmd, 0, I40E_FDIR_MAX_RAW_PACKET_LOOKUP, 0);
+
+       /* Mark the data descriptor to be watched */
+       tx_buf->next_to_watch = tx_desc;
+
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.  (Only
+        * applicable for weak-ordered memory model archs,
+        * such as IA-64).
+        */
+       wmb();
+
+       writel(tx_ring->next_to_use, tx_ring->tail);
+       return 0;
+
+dma_fail:
+       return -1;
+}
+
+/**
+ * i40e_fd_handle_status - check the Programming Status for FD
+ * @rx_ring: the Rx ring for this descriptor
+ * @qw: the descriptor data
+ * @prog_id: the id originally used for programming
+ *
+ * This is used to verify if the FD programming or invalidation
+ * requested by SW to the HW is successful or not and take actions accordingly.
+ **/
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u32 qw, u8 prog_id)
+{
+       struct pci_dev *pdev = rx_ring->vsi->back->pdev;
+       u32 error;
+
+       error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
+               I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
+
+       /* for now just print the Status */
+       dev_info(&pdev->dev, "FD programming id %02x, Status %08x\n",
+                prog_id, error);
+}
+
+/**
+ * i40e_unmap_tx_resource - Release a Tx buffer
+ * @ring:      the ring that owns the buffer
+ * @tx_buffer: the buffer to free
+ **/
+static inline void i40e_unmap_tx_resource(struct i40e_ring *ring,
+                                         struct i40e_tx_buffer *tx_buffer)
+{
+       if (tx_buffer->dma) {
+               if (tx_buffer->tx_flags & I40E_TX_FLAGS_MAPPED_AS_PAGE)
+                       dma_unmap_page(ring->dev,
+                                      tx_buffer->dma,
+                                      tx_buffer->length,
+                                      DMA_TO_DEVICE);
+               else
+                       dma_unmap_single(ring->dev,
+                                        tx_buffer->dma,
+                                        tx_buffer->length,
+                                        DMA_TO_DEVICE);
+       }
+       tx_buffer->dma = 0;
+       tx_buffer->time_stamp = 0;
+}
+
+/**
+ * i40e_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ **/
+void i40e_clean_tx_ring(struct i40e_ring *tx_ring)
+{
+       struct i40e_tx_buffer *tx_buffer;
+       unsigned long bi_size;
+       u16 i;
+
+       /* ring already cleared, nothing to do */
+       if (!tx_ring->tx_bi)
+               return;
+
+       /* Free all the Tx ring sk_buffs */
+       for (i = 0; i < tx_ring->count; i++) {
+               tx_buffer = &tx_ring->tx_bi[i];
+               i40e_unmap_tx_resource(tx_ring, tx_buffer);
+               if (tx_buffer->skb)
+                       dev_kfree_skb_any(tx_buffer->skb);
+               tx_buffer->skb = NULL;
+       }
+
+       bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+       memset(tx_ring->tx_bi, 0, bi_size);
+
+       /* Zero out the descriptor ring */
+       memset(tx_ring->desc, 0, tx_ring->size);
+
+       tx_ring->next_to_use = 0;
+       tx_ring->next_to_clean = 0;
+}
+
+/**
+ * i40e_free_tx_resources - Free Tx resources per queue
+ * @tx_ring: Tx descriptor ring for a specific queue
+ *
+ * Free all transmit software resources
+ **/
+void i40e_free_tx_resources(struct i40e_ring *tx_ring)
+{
+       i40e_clean_tx_ring(tx_ring);
+       kfree(tx_ring->tx_bi);
+       tx_ring->tx_bi = NULL;
+
+       if (tx_ring->desc) {
+               dma_free_coherent(tx_ring->dev, tx_ring->size,
+                                 tx_ring->desc, tx_ring->dma);
+               tx_ring->desc = NULL;
+       }
+}
+
+/**
+ * i40e_get_tx_pending - how many tx descriptors not processed
+ * @tx_ring: the ring of descriptors
+ *
+ * Since there is no access to the ring head register
+ * in XL710, we need to use our local copies
+ **/
+static u32 i40e_get_tx_pending(struct i40e_ring *ring)
+{
+       u32 ntu = ((ring->next_to_clean <= ring->next_to_use)
+                       ? ring->next_to_use
+                       : ring->next_to_use + ring->count);
+       return ntu - ring->next_to_clean;
+}
+
+/**
+ * i40e_check_tx_hang - Is there a hang in the Tx queue
+ * @tx_ring: the ring of descriptors
+ **/
+static bool i40e_check_tx_hang(struct i40e_ring *tx_ring)
+{
+       u32 tx_pending = i40e_get_tx_pending(tx_ring);
+       bool ret = false;
+
+       clear_check_for_tx_hang(tx_ring);
+
+       /* Check for a hung queue, but be thorough. This verifies
+        * that a transmit has been completed since the previous
+        * check AND there is at least one packet pending. The
+        * ARMED bit is set to indicate a potential hang. The
+        * bit is cleared if a pause frame is received to remove
+        * false hang detection due to PFC or 802.3x frames. By
+        * requiring this to fail twice we avoid races with
+        * PFC clearing the ARMED bit and conditions where we
+        * run the check_tx_hang logic with a transmit completion
+        * pending but without time to complete it yet.
+        */
+       if ((tx_ring->tx_stats.tx_done_old == tx_ring->tx_stats.packets) &&
+           tx_pending) {
+               /* make sure it is true for two checks in a row */
+               ret = test_and_set_bit(__I40E_HANG_CHECK_ARMED,
+                                      &tx_ring->state);
+       } else {
+               /* update completed stats and disarm the hang check */
+               tx_ring->tx_stats.tx_done_old = tx_ring->tx_stats.packets;
+               clear_bit(__I40E_HANG_CHECK_ARMED, &tx_ring->state);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_clean_tx_irq - Reclaim resources after transmit completes
+ * @tx_ring:  tx ring to clean
+ * @budget:   how many cleans we're allowed
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+static bool i40e_clean_tx_irq(struct i40e_ring *tx_ring, int budget)
+{
+       u16 i = tx_ring->next_to_clean;
+       struct i40e_tx_buffer *tx_buf;
+       struct i40e_tx_desc *tx_desc;
+       unsigned int total_packets = 0;
+       unsigned int total_bytes = 0;
+
+       tx_buf = &tx_ring->tx_bi[i];
+       tx_desc = I40E_TX_DESC(tx_ring, i);
+
+       for (; budget; budget--) {
+               struct i40e_tx_desc *eop_desc;
+
+               eop_desc = tx_buf->next_to_watch;
+
+               /* if next_to_watch is not set then there is no work pending */
+               if (!eop_desc)
+                       break;
+
+               /* if the descriptor isn't done, no work yet to do */
+               if (!(eop_desc->cmd_type_offset_bsz &
+                     cpu_to_le64(I40E_TX_DESC_DTYPE_DESC_DONE)))
+                       break;
+
+               /* count the packet as being completed */
+               tx_ring->tx_stats.completed++;
+               tx_buf->next_to_watch = NULL;
+               tx_buf->time_stamp = 0;
+
+               /* set memory barrier before eop_desc is verified */
+               rmb();
+
+               do {
+                       i40e_unmap_tx_resource(tx_ring, tx_buf);
+
+                       /* clear dtype status */
+                       tx_desc->cmd_type_offset_bsz &=
+                               ~cpu_to_le64(I40E_TXD_QW1_DTYPE_MASK);
+
+                       if (likely(tx_desc == eop_desc)) {
+                               eop_desc = NULL;
+
+                               dev_kfree_skb_any(tx_buf->skb);
+                               tx_buf->skb = NULL;
+
+                               total_bytes += tx_buf->bytecount;
+                               total_packets += tx_buf->gso_segs;
+                       }
+
+                       tx_buf++;
+                       tx_desc++;
+                       i++;
+                       if (unlikely(i == tx_ring->count)) {
+                               i = 0;
+                               tx_buf = tx_ring->tx_bi;
+                               tx_desc = I40E_TX_DESC(tx_ring, 0);
+                       }
+               } while (eop_desc);
+       }
+
+       tx_ring->next_to_clean = i;
+       tx_ring->tx_stats.bytes += total_bytes;
+       tx_ring->tx_stats.packets += total_packets;
+       tx_ring->q_vector->tx.total_bytes += total_bytes;
+       tx_ring->q_vector->tx.total_packets += total_packets;
+       if (check_for_tx_hang(tx_ring) && i40e_check_tx_hang(tx_ring)) {
+               /* schedule immediate reset if we believe we hung */
+               dev_info(tx_ring->dev, "Detected Tx Unit Hang\n"
+                        "  VSI                  <%d>\n"
+                        "  Tx Queue             <%d>\n"
+                        "  next_to_use          <%x>\n"
+                        "  next_to_clean        <%x>\n",
+                        tx_ring->vsi->seid,
+                        tx_ring->queue_index,
+                        tx_ring->next_to_use, i);
+               dev_info(tx_ring->dev, "tx_bi[next_to_clean]\n"
+                        "  time_stamp           <%lx>\n"
+                        "  jiffies              <%lx>\n",
+                        tx_ring->tx_bi[i].time_stamp, jiffies);
+
+               netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+
+               dev_info(tx_ring->dev,
+                        "tx hang detected on queue %d, resetting adapter\n",
+                        tx_ring->queue_index);
+
+               tx_ring->netdev->netdev_ops->ndo_tx_timeout(tx_ring->netdev);
+
+               /* the adapter is about to reset, no point in enabling stuff */
+               return true;
+       }
+
+#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
+       if (unlikely(total_packets && netif_carrier_ok(tx_ring->netdev) &&
+                    (I40E_DESC_UNUSED(tx_ring) >= TX_WAKE_THRESHOLD))) {
+               /* Make sure that anybody stopping the queue after this
+                * sees the new next_to_clean.
+                */
+               smp_mb();
+               if (__netif_subqueue_stopped(tx_ring->netdev,
+                                            tx_ring->queue_index) &&
+                  !test_bit(__I40E_DOWN, &tx_ring->vsi->state)) {
+                       netif_wake_subqueue(tx_ring->netdev,
+                                           tx_ring->queue_index);
+                       ++tx_ring->tx_stats.restart_queue;
+               }
+       }
+
+       return budget > 0;
+}
+
+/**
+ * i40e_set_new_dynamic_itr - Find new ITR level
+ * @rc: structure containing ring performance data
+ *
+ * Stores a new ITR value based on packets and byte counts during
+ * the last interrupt.  The advantage of per interrupt computation
+ * is faster updates and more accurate ITR for the current traffic
+ * pattern.  Constants in this function were computed based on
+ * theoretical maximum wire speed and thresholds were set based on
+ * testing data as well as attempting to minimize response time
+ * while increasing bulk throughput.
+ **/
+static void i40e_set_new_dynamic_itr(struct i40e_ring_container *rc)
+{
+       enum i40e_latency_range new_latency_range = rc->latency_range;
+       u32 new_itr = rc->itr;
+       int bytes_per_int;
+
+       if (rc->total_packets == 0 || !rc->itr)
+               return;
+
+       /* simple throttlerate management
+        *   0-10MB/s   lowest (100000 ints/s)
+        *  10-20MB/s   low    (20000 ints/s)
+        *  20-1249MB/s bulk   (8000 ints/s)
+        */
+       bytes_per_int = rc->total_bytes / rc->itr;
+       switch (rc->itr) {
+       case I40E_LOWEST_LATENCY:
+               if (bytes_per_int > 10)
+                       new_latency_range = I40E_LOW_LATENCY;
+               break;
+       case I40E_LOW_LATENCY:
+               if (bytes_per_int > 20)
+                       new_latency_range = I40E_BULK_LATENCY;
+               else if (bytes_per_int <= 10)
+                       new_latency_range = I40E_LOWEST_LATENCY;
+               break;
+       case I40E_BULK_LATENCY:
+               if (bytes_per_int <= 20)
+                       rc->latency_range = I40E_LOW_LATENCY;
+               break;
+       }
+
+       switch (new_latency_range) {
+       case I40E_LOWEST_LATENCY:
+               new_itr = I40E_ITR_100K;
+               break;
+       case I40E_LOW_LATENCY:
+               new_itr = I40E_ITR_20K;
+               break;
+       case I40E_BULK_LATENCY:
+               new_itr = I40E_ITR_8K;
+               break;
+       default:
+               break;
+       }
+
+       if (new_itr != rc->itr) {
+               /* do an exponential smoothing */
+               new_itr = (10 * new_itr * rc->itr) /
+                         ((9 * new_itr) + rc->itr);
+               rc->itr = new_itr & I40E_MAX_ITR;
+       }
+
+       rc->total_bytes = 0;
+       rc->total_packets = 0;
+}
+
+/**
+ * i40e_update_dynamic_itr - Adjust ITR based on bytes per int
+ * @q_vector: the vector to adjust
+ **/
+static void i40e_update_dynamic_itr(struct i40e_q_vector *q_vector)
+{
+       u16 vector = q_vector->vsi->base_vector + q_vector->v_idx;
+       struct i40e_hw *hw = &q_vector->vsi->back->hw;
+       u32 reg_addr;
+       u16 old_itr;
+
+       reg_addr = I40E_PFINT_ITRN(I40E_RX_ITR, vector - 1);
+       old_itr = q_vector->rx.itr;
+       i40e_set_new_dynamic_itr(&q_vector->rx);
+       if (old_itr != q_vector->rx.itr)
+               wr32(hw, reg_addr, q_vector->rx.itr);
+
+       reg_addr = I40E_PFINT_ITRN(I40E_TX_ITR, vector - 1);
+       old_itr = q_vector->tx.itr;
+       i40e_set_new_dynamic_itr(&q_vector->tx);
+       if (old_itr != q_vector->tx.itr)
+               wr32(hw, reg_addr, q_vector->tx.itr);
+
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_clean_programming_status - clean the programming status descriptor
+ * @rx_ring: the rx ring that has this descriptor
+ * @rx_desc: the rx descriptor written back by HW
+ *
+ * Flow director should handle FD_FILTER_STATUS to check its filter programming
+ * status being successful or not and take actions accordingly. FCoE should
+ * handle its context/filter programming/invalidation status and take actions.
+ *
+ **/
+static void i40e_clean_programming_status(struct i40e_ring *rx_ring,
+                                         union i40e_rx_desc *rx_desc)
+{
+       u64 qw;
+       u8 id;
+
+       qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+       id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+                 I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
+
+       if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
+               i40e_fd_handle_status(rx_ring, qw, id);
+}
+
+/**
+ * i40e_setup_tx_descriptors - Allocate the Tx descriptors
+ * @tx_ring: the tx ring to set up
+ *
+ * Return 0 on success, negative on error
+ **/
+int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring)
+{
+       struct device *dev = tx_ring->dev;
+       int bi_size;
+
+       if (!dev)
+               return -ENOMEM;
+
+       bi_size = sizeof(struct i40e_tx_buffer) * tx_ring->count;
+       tx_ring->tx_bi = kzalloc(bi_size, GFP_KERNEL);
+       if (!tx_ring->tx_bi)
+               goto err;
+
+       /* round up to nearest 4K */
+       tx_ring->size = tx_ring->count * sizeof(struct i40e_tx_desc);
+       tx_ring->size = ALIGN(tx_ring->size, 4096);
+       tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
+                                          &tx_ring->dma, GFP_KERNEL);
+       if (!tx_ring->desc) {
+               dev_info(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n",
+                        tx_ring->size);
+               goto err;
+       }
+
+       tx_ring->next_to_use = 0;
+       tx_ring->next_to_clean = 0;
+       return 0;
+
+err:
+       kfree(tx_ring->tx_bi);
+       tx_ring->tx_bi = NULL;
+       return -ENOMEM;
+}
+
+/**
+ * i40e_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ **/
+void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
+{
+       struct device *dev = rx_ring->dev;
+       struct i40e_rx_buffer *rx_bi;
+       unsigned long bi_size;
+       u16 i;
+
+       /* ring already cleared, nothing to do */
+       if (!rx_ring->rx_bi)
+               return;
+
+       /* Free all the Rx ring sk_buffs */
+       for (i = 0; i < rx_ring->count; i++) {
+               rx_bi = &rx_ring->rx_bi[i];
+               if (rx_bi->dma) {
+                       dma_unmap_single(dev,
+                                        rx_bi->dma,
+                                        rx_ring->rx_buf_len,
+                                        DMA_FROM_DEVICE);
+                       rx_bi->dma = 0;
+               }
+               if (rx_bi->skb) {
+                       dev_kfree_skb(rx_bi->skb);
+                       rx_bi->skb = NULL;
+               }
+               if (rx_bi->page) {
+                       if (rx_bi->page_dma) {
+                               dma_unmap_page(dev,
+                                              rx_bi->page_dma,
+                                              PAGE_SIZE / 2,
+                                              DMA_FROM_DEVICE);
+                               rx_bi->page_dma = 0;
+                       }
+                       __free_page(rx_bi->page);
+                       rx_bi->page = NULL;
+                       rx_bi->page_offset = 0;
+               }
+       }
+
+       bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
+       memset(rx_ring->rx_bi, 0, bi_size);
+
+       /* Zero out the descriptor ring */
+       memset(rx_ring->desc, 0, rx_ring->size);
+
+       rx_ring->next_to_clean = 0;
+       rx_ring->next_to_use = 0;
+}
+
+/**
+ * i40e_free_rx_resources - Free Rx resources
+ * @rx_ring: ring to clean the resources from
+ *
+ * Free all receive software resources
+ **/
+void i40e_free_rx_resources(struct i40e_ring *rx_ring)
+{
+       i40e_clean_rx_ring(rx_ring);
+       kfree(rx_ring->rx_bi);
+       rx_ring->rx_bi = NULL;
+
+       if (rx_ring->desc) {
+               dma_free_coherent(rx_ring->dev, rx_ring->size,
+                                 rx_ring->desc, rx_ring->dma);
+               rx_ring->desc = NULL;
+       }
+}
+
+/**
+ * i40e_setup_rx_descriptors - Allocate Rx descriptors
+ * @rx_ring: Rx descriptor ring (for a specific queue) to setup
+ *
+ * Returns 0 on success, negative on failure
+ **/
+int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
+{
+       struct device *dev = rx_ring->dev;
+       int bi_size;
+
+       bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
+       rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
+       if (!rx_ring->rx_bi)
+               goto err;
+
+       /* Round up to nearest 4K */
+       rx_ring->size = ring_is_16byte_desc_enabled(rx_ring)
+               ? rx_ring->count * sizeof(union i40e_16byte_rx_desc)
+               : rx_ring->count * sizeof(union i40e_32byte_rx_desc);
+       rx_ring->size = ALIGN(rx_ring->size, 4096);
+       rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
+                                          &rx_ring->dma, GFP_KERNEL);
+
+       if (!rx_ring->desc) {
+               dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
+                        rx_ring->size);
+               goto err;
+       }
+
+       rx_ring->next_to_clean = 0;
+       rx_ring->next_to_use = 0;
+
+       return 0;
+err:
+       kfree(rx_ring->rx_bi);
+       rx_ring->rx_bi = NULL;
+       return -ENOMEM;
+}
+
+/**
+ * i40e_release_rx_desc - Store the new tail and head values
+ * @rx_ring: ring to bump
+ * @val: new head index
+ **/
+static inline void i40e_release_rx_desc(struct i40e_ring *rx_ring, u32 val)
+{
+       rx_ring->next_to_use = val;
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.  (Only
+        * applicable for weak-ordered memory model archs,
+        * such as IA-64).
+        */
+       wmb();
+       writel(val, rx_ring->tail);
+}
+
+/**
+ * i40e_alloc_rx_buffers - Replace used receive buffers; packet split
+ * @rx_ring: ring to place buffers on
+ * @cleaned_count: number of buffers to replace
+ **/
+void i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
+{
+       u16 i = rx_ring->next_to_use;
+       union i40e_rx_desc *rx_desc;
+       struct i40e_rx_buffer *bi;
+       struct sk_buff *skb;
+
+       /* do nothing if no valid netdev defined */
+       if (!rx_ring->netdev || !cleaned_count)
+               return;
+
+       while (cleaned_count--) {
+               rx_desc = I40E_RX_DESC(rx_ring, i);
+               bi = &rx_ring->rx_bi[i];
+               skb = bi->skb;
+
+               if (!skb) {
+                       skb = netdev_alloc_skb_ip_align(rx_ring->netdev,
+                                                       rx_ring->rx_buf_len);
+                       if (!skb) {
+                               rx_ring->rx_stats.alloc_rx_buff_failed++;
+                               goto no_buffers;
+                       }
+                       /* initialize queue mapping */
+                       skb_record_rx_queue(skb, rx_ring->queue_index);
+                       bi->skb = skb;
+               }
+
+               if (!bi->dma) {
+                       bi->dma = dma_map_single(rx_ring->dev,
+                                                skb->data,
+                                                rx_ring->rx_buf_len,
+                                                DMA_FROM_DEVICE);
+                       if (dma_mapping_error(rx_ring->dev, bi->dma)) {
+                               rx_ring->rx_stats.alloc_rx_buff_failed++;
+                               bi->dma = 0;
+                               goto no_buffers;
+                       }
+               }
+
+               if (ring_is_ps_enabled(rx_ring)) {
+                       if (!bi->page) {
+                               bi->page = alloc_page(GFP_ATOMIC);
+                               if (!bi->page) {
+                                       rx_ring->rx_stats.alloc_rx_page_failed++;
+                                       goto no_buffers;
+                               }
+                       }
+
+                       if (!bi->page_dma) {
+                               /* use a half page if we're re-using */
+                               bi->page_offset ^= PAGE_SIZE / 2;
+                               bi->page_dma = dma_map_page(rx_ring->dev,
+                                                           bi->page,
+                                                           bi->page_offset,
+                                                           PAGE_SIZE / 2,
+                                                           DMA_FROM_DEVICE);
+                               if (dma_mapping_error(rx_ring->dev,
+                                                     bi->page_dma)) {
+                                       rx_ring->rx_stats.alloc_rx_page_failed++;
+                                       bi->page_dma = 0;
+                                       goto no_buffers;
+                               }
+                       }
+
+                       /* Refresh the desc even if buffer_addrs didn't change
+                        * because each write-back erases this info.
+                        */
+                       rx_desc->read.pkt_addr = cpu_to_le64(bi->page_dma);
+                       rx_desc->read.hdr_addr = cpu_to_le64(bi->dma);
+               } else {
+                       rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+                       rx_desc->read.hdr_addr = 0;
+               }
+               i++;
+               if (i == rx_ring->count)
+                       i = 0;
+       }
+
+no_buffers:
+       if (rx_ring->next_to_use != i)
+               i40e_release_rx_desc(rx_ring, i);
+}
+
+/**
+ * i40e_receive_skb - Send a completed packet up the stack
+ * @rx_ring:  rx ring in play
+ * @skb: packet to send up
+ * @vlan_tag: vlan tag for packet
+ **/
+static void i40e_receive_skb(struct i40e_ring *rx_ring,
+                            struct sk_buff *skb, u16 vlan_tag)
+{
+       struct i40e_q_vector *q_vector = rx_ring->q_vector;
+       struct i40e_vsi *vsi = rx_ring->vsi;
+       u64 flags = vsi->back->flags;
+
+       if (vlan_tag & VLAN_VID_MASK)
+               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+
+       if (flags & I40E_FLAG_IN_NETPOLL)
+               netif_rx(skb);
+       else
+               napi_gro_receive(&q_vector->napi, skb);
+}
+
+/**
+ * i40e_rx_checksum - Indicate in skb if hw indicated a good cksum
+ * @vsi: the VSI we care about
+ * @skb: skb currently being received and modified
+ * @rx_status: status value of last descriptor in packet
+ * @rx_error: error value of last descriptor in packet
+ **/
+static inline void i40e_rx_checksum(struct i40e_vsi *vsi,
+                                   struct sk_buff *skb,
+                                   u32 rx_status,
+                                   u32 rx_error)
+{
+       skb->ip_summed = CHECKSUM_NONE;
+
+       /* Rx csum enabled and ip headers found? */
+       if (!(vsi->netdev->features & NETIF_F_RXCSUM &&
+             rx_status & (1 << I40E_RX_DESC_STATUS_L3L4P_SHIFT)))
+               return;
+
+       /* IP or L4 checksum error */
+       if (rx_error & ((1 << I40E_RX_DESC_ERROR_IPE_SHIFT) |
+                       (1 << I40E_RX_DESC_ERROR_L4E_SHIFT))) {
+               vsi->back->hw_csum_rx_error++;
+               return;
+       }
+
+       skb->ip_summed = CHECKSUM_UNNECESSARY;
+}
+
+/**
+ * i40e_rx_hash - returns the hash value from the Rx descriptor
+ * @ring: descriptor ring
+ * @rx_desc: specific descriptor
+ **/
+static inline u32 i40e_rx_hash(struct i40e_ring *ring,
+                              union i40e_rx_desc *rx_desc)
+{
+       if (ring->netdev->features & NETIF_F_RXHASH) {
+               if ((le64_to_cpu(rx_desc->wb.qword1.status_error_len) >>
+                    I40E_RX_DESC_STATUS_FLTSTAT_SHIFT) &
+                   I40E_RX_DESC_FLTSTAT_RSS_HASH)
+                       return le32_to_cpu(rx_desc->wb.qword0.hi_dword.rss);
+       }
+       return 0;
+}
+
+/**
+ * i40e_clean_rx_irq - Reclaim resources after receive completes
+ * @rx_ring:  rx ring to clean
+ * @budget:   how many cleans we're allowed
+ *
+ * Returns true if there's any budget left (e.g. the clean is finished)
+ **/
+static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
+{
+       unsigned int total_rx_bytes = 0, total_rx_packets = 0;
+       u16 rx_packet_len, rx_header_len, rx_sph, rx_hbo;
+       u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+       const int current_node = numa_node_id();
+       struct i40e_vsi *vsi = rx_ring->vsi;
+       u16 i = rx_ring->next_to_clean;
+       union i40e_rx_desc *rx_desc;
+       u32 rx_error, rx_status;
+       u64 qword;
+
+       rx_desc = I40E_RX_DESC(rx_ring, i);
+       qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+       rx_status = (qword & I40E_RXD_QW1_STATUS_MASK)
+                               >> I40E_RXD_QW1_STATUS_SHIFT;
+
+       while (rx_status & (1 << I40E_RX_DESC_STATUS_DD_SHIFT)) {
+               union i40e_rx_desc *next_rxd;
+               struct i40e_rx_buffer *rx_bi;
+               struct sk_buff *skb;
+               u16 vlan_tag;
+               if (i40e_rx_is_programming_status(qword)) {
+                       i40e_clean_programming_status(rx_ring, rx_desc);
+                       I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
+                       goto next_desc;
+               }
+               rx_bi = &rx_ring->rx_bi[i];
+               skb = rx_bi->skb;
+               prefetch(skb->data);
+
+               rx_packet_len = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK)
+                                             >> I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
+               rx_header_len = (qword & I40E_RXD_QW1_LENGTH_HBUF_MASK)
+                                             >> I40E_RXD_QW1_LENGTH_HBUF_SHIFT;
+               rx_sph = (qword & I40E_RXD_QW1_LENGTH_SPH_MASK)
+                                             >> I40E_RXD_QW1_LENGTH_SPH_SHIFT;
+
+               rx_error = (qword & I40E_RXD_QW1_ERROR_MASK)
+                                             >> I40E_RXD_QW1_ERROR_SHIFT;
+               rx_hbo = rx_error & (1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+               rx_error &= ~(1 << I40E_RX_DESC_ERROR_HBO_SHIFT);
+
+               rx_bi->skb = NULL;
+
+               /* This memory barrier is needed to keep us from reading
+                * any other fields out of the rx_desc until we know the
+                * STATUS_DD bit is set
+                */
+               rmb();
+
+               /* Get the header and possibly the whole packet
+                * If this is an skb from previous receive dma will be 0
+                */
+               if (rx_bi->dma) {
+                       u16 len;
+
+                       if (rx_hbo)
+                               len = I40E_RX_HDR_SIZE;
+                       else if (rx_sph)
+                               len = rx_header_len;
+                       else if (rx_packet_len)
+                               len = rx_packet_len;   /* 1buf/no split found */
+                       else
+                               len = rx_header_len;   /* split always mode */
+
+                       skb_put(skb, len);
+                       dma_unmap_single(rx_ring->dev,
+                                        rx_bi->dma,
+                                        rx_ring->rx_buf_len,
+                                        DMA_FROM_DEVICE);
+                       rx_bi->dma = 0;
+               }
+
+               /* Get the rest of the data if this was a header split */
+               if (ring_is_ps_enabled(rx_ring) && rx_packet_len) {
+
+                       skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
+                                          rx_bi->page,
+                                          rx_bi->page_offset,
+                                          rx_packet_len);
+
+                       skb->len += rx_packet_len;
+                       skb->data_len += rx_packet_len;
+                       skb->truesize += rx_packet_len;
+
+                       if ((page_count(rx_bi->page) == 1) &&
+                           (page_to_nid(rx_bi->page) == current_node))
+                               get_page(rx_bi->page);
+                       else
+                               rx_bi->page = NULL;
+
+                       dma_unmap_page(rx_ring->dev,
+                                      rx_bi->page_dma,
+                                      PAGE_SIZE / 2,
+                                      DMA_FROM_DEVICE);
+                       rx_bi->page_dma = 0;
+               }
+               I40E_RX_NEXT_DESC_PREFETCH(rx_ring, i, next_rxd);
+
+               if (unlikely(
+                   !(rx_status & (1 << I40E_RX_DESC_STATUS_EOF_SHIFT)))) {
+                       struct i40e_rx_buffer *next_buffer;
+
+                       next_buffer = &rx_ring->rx_bi[i];
+
+                       if (ring_is_ps_enabled(rx_ring)) {
+                               rx_bi->skb = next_buffer->skb;
+                               rx_bi->dma = next_buffer->dma;
+                               next_buffer->skb = skb;
+                               next_buffer->dma = 0;
+                       }
+                       rx_ring->rx_stats.non_eop_descs++;
+                       goto next_desc;
+               }
+
+               /* ERR_MASK will only have valid bits if EOP set */
+               if (unlikely(rx_error & (1 << I40E_RX_DESC_ERROR_RXE_SHIFT))) {
+                       dev_kfree_skb_any(skb);
+                       goto next_desc;
+               }
+
+               skb->rxhash = i40e_rx_hash(rx_ring, rx_desc);
+               i40e_rx_checksum(vsi, skb, rx_status, rx_error);
+
+               /* probably a little skewed due to removing CRC */
+               total_rx_bytes += skb->len;
+               total_rx_packets++;
+
+               skb->protocol = eth_type_trans(skb, rx_ring->netdev);
+               vlan_tag = rx_status & (1 << I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)
+                        ? le16_to_cpu(rx_desc->wb.qword0.lo_dword.l2tag1)
+                        : 0;
+               i40e_receive_skb(rx_ring, skb, vlan_tag);
+
+               rx_ring->netdev->last_rx = jiffies;
+               budget--;
+next_desc:
+               rx_desc->wb.qword1.status_error_len = 0;
+               if (!budget)
+                       break;
+
+               cleaned_count++;
+               /* return some buffers to hardware, one at a time is too slow */
+               if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
+                       i40e_alloc_rx_buffers(rx_ring, cleaned_count);
+                       cleaned_count = 0;
+               }
+
+               /* use prefetched values */
+               rx_desc = next_rxd;
+               qword = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
+               rx_status = (qword & I40E_RXD_QW1_STATUS_MASK)
+                                               >> I40E_RXD_QW1_STATUS_SHIFT;
+       }
+
+       rx_ring->next_to_clean = i;
+       rx_ring->rx_stats.packets += total_rx_packets;
+       rx_ring->rx_stats.bytes += total_rx_bytes;
+       rx_ring->q_vector->rx.total_packets += total_rx_packets;
+       rx_ring->q_vector->rx.total_bytes += total_rx_bytes;
+
+       if (cleaned_count)
+               i40e_alloc_rx_buffers(rx_ring, cleaned_count);
+
+       return budget > 0;
+}
+
+/**
+ * i40e_napi_poll - NAPI polling Rx/Tx cleanup routine
+ * @napi: napi struct with our devices info in it
+ * @budget: amount of work driver is allowed to do this pass, in packets
+ *
+ * This function will clean all queues associated with a q_vector.
+ *
+ * Returns the amount of work done
+ **/
+int i40e_napi_poll(struct napi_struct *napi, int budget)
+{
+       struct i40e_q_vector *q_vector =
+                              container_of(napi, struct i40e_q_vector, napi);
+       struct i40e_vsi *vsi = q_vector->vsi;
+       bool clean_complete = true;
+       int budget_per_ring;
+       int i;
+
+       if (test_bit(__I40E_DOWN, &vsi->state)) {
+               napi_complete(napi);
+               return 0;
+       }
+
+       /* We attempt to distribute budget to each Rx queue fairly, but don't
+        * allow the budget to go below 1 because that would exit polling early.
+        * Since the actual Tx work is minimal, we can give the Tx a larger
+        * budget and be more aggressive about cleaning up the Tx descriptors.
+        */
+       budget_per_ring = max(budget/q_vector->num_ringpairs, 1);
+       for (i = 0; i < q_vector->num_ringpairs; i++) {
+               clean_complete &= i40e_clean_tx_irq(q_vector->tx.ring[i],
+                                                   vsi->work_limit);
+               clean_complete &= i40e_clean_rx_irq(q_vector->rx.ring[i],
+                                                   budget_per_ring);
+       }
+
+       /* If work not completed, return budget and polling will return */
+       if (!clean_complete)
+               return budget;
+
+       /* Work is done so exit the polling mode and re-enable the interrupt */
+       napi_complete(napi);
+       if (ITR_IS_DYNAMIC(vsi->rx_itr_setting) ||
+           ITR_IS_DYNAMIC(vsi->tx_itr_setting))
+               i40e_update_dynamic_itr(q_vector);
+
+       if (!test_bit(__I40E_DOWN, &vsi->state)) {
+               if (vsi->back->flags & I40E_FLAG_MSIX_ENABLED) {
+                       i40e_irq_dynamic_enable(vsi,
+                                       q_vector->v_idx + vsi->base_vector);
+               } else {
+                       struct i40e_hw *hw = &vsi->back->hw;
+                       /* We re-enable the queue 0 cause, but
+                        * don't worry about dynamic_enable
+                        * because we left it on for the other
+                        * possible interrupts during napi
+                        */
+                       u32 qval = rd32(hw, I40E_QINT_RQCTL(0));
+                       qval |= I40E_QINT_RQCTL_CAUSE_ENA_MASK;
+                       wr32(hw, I40E_QINT_RQCTL(0), qval);
+
+                       qval = rd32(hw, I40E_QINT_TQCTL(0));
+                       qval |= I40E_QINT_TQCTL_CAUSE_ENA_MASK;
+                       wr32(hw, I40E_QINT_TQCTL(0), qval);
+                       i40e_flush(hw);
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_atr - Add a Flow Director ATR filter
+ * @tx_ring:  ring to add programming descriptor to
+ * @skb:      send buffer
+ * @flags:    send flags
+ * @protocol: wire protocol
+ **/
+static void i40e_atr(struct i40e_ring *tx_ring, struct sk_buff *skb,
+                    u32 flags, __be16 protocol)
+{
+       struct i40e_filter_program_desc *fdir_desc;
+       struct i40e_pf *pf = tx_ring->vsi->back;
+       union {
+               unsigned char *network;
+               struct iphdr *ipv4;
+               struct ipv6hdr *ipv6;
+       } hdr;
+       struct tcphdr *th;
+       unsigned int hlen;
+       u32 flex_ptype, dtype_cmd;
+
+       /* make sure ATR is enabled */
+       if (!(pf->flags & I40E_FLAG_FDIR_ATR_ENABLED))
+               return;
+
+       /* if sampling is disabled do nothing */
+       if (!tx_ring->atr_sample_rate)
+               return;
+
+       tx_ring->atr_count++;
+
+       /* snag network header to get L4 type and address */
+       hdr.network = skb_network_header(skb);
+
+       /* Currently only IPv4/IPv6 with TCP is supported */
+       if (protocol == htons(ETH_P_IP)) {
+               if (hdr.ipv4->protocol != IPPROTO_TCP)
+                       return;
+
+               /* access ihl as a u8 to avoid unaligned access on ia64 */
+               hlen = (hdr.network[0] & 0x0F) << 2;
+       } else if (protocol == htons(ETH_P_IPV6)) {
+               if (hdr.ipv6->nexthdr != IPPROTO_TCP)
+                       return;
+
+               hlen = sizeof(struct ipv6hdr);
+       } else {
+               return;
+       }
+
+       th = (struct tcphdr *)(hdr.network + hlen);
+
+       /* sample on all syn/fin packets or once every atr sample rate */
+       if (!th->fin && !th->syn && (tx_ring->atr_count < tx_ring->atr_sample_rate))
+               return;
+
+       tx_ring->atr_count = 0;
+
+       /* grab the next descriptor */
+       fdir_desc = I40E_TX_FDIRDESC(tx_ring, tx_ring->next_to_use);
+       tx_ring->next_to_use++;
+       if (tx_ring->next_to_use == tx_ring->count)
+               tx_ring->next_to_use = 0;
+
+       flex_ptype = (tx_ring->queue_index << I40E_TXD_FLTR_QW0_QINDEX_SHIFT) &
+                     I40E_TXD_FLTR_QW0_QINDEX_MASK;
+       flex_ptype |= (protocol == htons(ETH_P_IP)) ?
+                     (I40E_FILTER_PCTYPE_NONF_IPV4_TCP <<
+                      I40E_TXD_FLTR_QW0_PCTYPE_SHIFT) :
+                     (I40E_FILTER_PCTYPE_NONF_IPV6_TCP <<
+                      I40E_TXD_FLTR_QW0_PCTYPE_SHIFT);
+
+       flex_ptype |= tx_ring->vsi->id << I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT;
+
+       dtype_cmd = I40E_TX_DESC_DTYPE_FILTER_PROG;
+
+       dtype_cmd |= th->fin ?
+                    (I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE <<
+                     I40E_TXD_FLTR_QW1_PCMD_SHIFT) :
+                    (I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE <<
+                     I40E_TXD_FLTR_QW1_PCMD_SHIFT);
+
+       dtype_cmd |= I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX <<
+                    I40E_TXD_FLTR_QW1_DEST_SHIFT;
+
+       dtype_cmd |= I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID <<
+                    I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT;
+
+       fdir_desc->qindex_flex_ptype_vsi = cpu_to_le32(flex_ptype);
+       fdir_desc->dtype_cmd_cntindex = cpu_to_le32(dtype_cmd);
+}
+
+#define I40E_TXD_CMD (I40E_TX_DESC_CMD_EOP | I40E_TX_DESC_CMD_RS)
+/**
+ * i40e_tx_prepare_vlan_flags - prepare generic TX VLAN tagging flags for HW
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ * @flags:   the tx flags to be set
+ *
+ * Checks the skb and set up correspondingly several generic transmit flags
+ * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
+ *
+ * Returns error code indicate the frame should be dropped upon error and the
+ * otherwise  returns 0 to indicate the flags has been set properly.
+ **/
+static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
+                                     struct i40e_ring *tx_ring,
+                                     u32 *flags)
+{
+       __be16 protocol = skb->protocol;
+       u32  tx_flags = 0;
+
+       /* if we have a HW VLAN tag being added, default to the HW one */
+       if (vlan_tx_tag_present(skb)) {
+               tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+               tx_flags |= I40E_TX_FLAGS_HW_VLAN;
+       /* else if it is a SW VLAN, check the next protocol and store the tag */
+       } else if (protocol == __constant_htons(ETH_P_8021Q)) {
+               struct vlan_hdr *vhdr, _vhdr;
+               vhdr = skb_header_pointer(skb, ETH_HLEN, sizeof(_vhdr), &_vhdr);
+               if (!vhdr)
+                       return -EINVAL;
+
+               protocol = vhdr->h_vlan_encapsulated_proto;
+               tx_flags |= ntohs(vhdr->h_vlan_TCI) << I40E_TX_FLAGS_VLAN_SHIFT;
+               tx_flags |= I40E_TX_FLAGS_SW_VLAN;
+       }
+
+       /* Insert 802.1p priority into VLAN header */
+       if ((tx_ring->vsi->back->flags & I40E_FLAG_DCB_ENABLED) &&
+           ((tx_flags & (I40E_TX_FLAGS_HW_VLAN | I40E_TX_FLAGS_SW_VLAN)) ||
+            (skb->priority != TC_PRIO_CONTROL))) {
+               tx_flags &= ~I40E_TX_FLAGS_VLAN_PRIO_MASK;
+               tx_flags |= (skb->priority & 0x7) <<
+                               I40E_TX_FLAGS_VLAN_PRIO_SHIFT;
+               if (tx_flags & I40E_TX_FLAGS_SW_VLAN) {
+                       struct vlan_ethhdr *vhdr;
+                       if (skb_header_cloned(skb) &&
+                           pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
+                               return -ENOMEM;
+                       vhdr = (struct vlan_ethhdr *)skb->data;
+                       vhdr->h_vlan_TCI = htons(tx_flags >>
+                                                I40E_TX_FLAGS_VLAN_SHIFT);
+               } else {
+                       tx_flags |= I40E_TX_FLAGS_HW_VLAN;
+               }
+       }
+       *flags = tx_flags;
+       return 0;
+}
+
+/**
+ * i40e_tx_csum - is checksum offload requested
+ * @tx_ring:  ptr to the ring to send
+ * @skb:      ptr to the skb we're sending
+ * @tx_flags: the collected send information
+ * @protocol: the send protocol
+ *
+ * Returns true if checksum offload is requested
+ **/
+static bool i40e_tx_csum(struct i40e_ring *tx_ring, struct sk_buff *skb,
+                        u32 tx_flags, __be16 protocol)
+{
+       if ((skb->ip_summed != CHECKSUM_PARTIAL) &&
+           !(tx_flags & I40E_TX_FLAGS_TXSW)) {
+               if (!(tx_flags & I40E_TX_FLAGS_HW_VLAN))
+                       return false;
+       }
+
+       return skb->ip_summed == CHECKSUM_PARTIAL;
+}
+
+/**
+ * i40e_tso - set up the tso context descriptor
+ * @tx_ring:  ptr to the ring to send
+ * @skb:      ptr to the skb we're sending
+ * @tx_flags: the collected send information
+ * @protocol: the send protocol
+ * @hdr_len:  ptr to the size of the packet header
+ * @cd_tunneling: ptr to context descriptor bits
+ *
+ * Returns 0 if no TSO can happen, 1 if tso is going, or error
+ **/
+static int i40e_tso(struct i40e_ring *tx_ring, struct sk_buff *skb,
+                   u32 tx_flags, __be16 protocol, u8 *hdr_len,
+                   u64 *cd_type_cmd_tso_mss, u32 *cd_tunneling)
+{
+       u32 cd_cmd, cd_tso_len, cd_mss;
+       struct tcphdr *tcph;
+       struct iphdr *iph;
+       u32 l4len;
+       int err;
+       struct ipv6hdr *ipv6h;
+
+       if (!skb_is_gso(skb))
+               return 0;
+
+       if (skb_header_cloned(skb)) {
+               err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+               if (err)
+                       return err;
+       }
+
+       if (protocol == __constant_htons(ETH_P_IP)) {
+               iph = skb->encapsulation ? inner_ip_hdr(skb) : ip_hdr(skb);
+               tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+               iph->tot_len = 0;
+               iph->check = 0;
+               tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+                                                0, IPPROTO_TCP, 0);
+       } else if (skb_is_gso_v6(skb)) {
+
+               ipv6h = skb->encapsulation ? inner_ipv6_hdr(skb)
+                                          : ipv6_hdr(skb);
+               tcph = skb->encapsulation ? inner_tcp_hdr(skb) : tcp_hdr(skb);
+               ipv6h->payload_len = 0;
+               tcph->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr,
+                                              0, IPPROTO_TCP, 0);
+       }
+
+       l4len = skb->encapsulation ? inner_tcp_hdrlen(skb) : tcp_hdrlen(skb);
+       *hdr_len = (skb->encapsulation
+                   ? (skb_inner_transport_header(skb) - skb->data)
+                   : skb_transport_offset(skb)) + l4len;
+
+       /* find the field values */
+       cd_cmd = I40E_TX_CTX_DESC_TSO;
+       cd_tso_len = skb->len - *hdr_len;
+       cd_mss = skb_shinfo(skb)->gso_size;
+       *cd_type_cmd_tso_mss |= ((u64)cd_cmd << I40E_TXD_CTX_QW1_CMD_SHIFT)
+                            | ((u64)cd_tso_len
+                               << I40E_TXD_CTX_QW1_TSO_LEN_SHIFT)
+                            | ((u64)cd_mss << I40E_TXD_CTX_QW1_MSS_SHIFT);
+       return 1;
+}
+
+/**
+ * i40e_tx_enable_csum - Enable Tx checksum offloads
+ * @skb: send buffer
+ * @tx_flags: Tx flags currently set
+ * @td_cmd: Tx descriptor command bits to set
+ * @td_offset: Tx descriptor header offsets to set
+ * @cd_tunneling: ptr to context desc bits
+ **/
+static void i40e_tx_enable_csum(struct sk_buff *skb, u32 tx_flags,
+                               u32 *td_cmd, u32 *td_offset,
+                               struct i40e_ring *tx_ring,
+                               u32 *cd_tunneling)
+{
+       struct ipv6hdr *this_ipv6_hdr;
+       unsigned int this_tcp_hdrlen;
+       struct iphdr *this_ip_hdr;
+       u32 network_hdr_len;
+       u8 l4_hdr = 0;
+
+       if (skb->encapsulation) {
+               network_hdr_len = skb_inner_network_header_len(skb);
+               this_ip_hdr = inner_ip_hdr(skb);
+               this_ipv6_hdr = inner_ipv6_hdr(skb);
+               this_tcp_hdrlen = inner_tcp_hdrlen(skb);
+
+               if (tx_flags & I40E_TX_FLAGS_IPV4) {
+
+                       if (tx_flags & I40E_TX_FLAGS_TSO) {
+                               *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV4;
+                               ip_hdr(skb)->check = 0;
+                       } else {
+                               *cd_tunneling |=
+                                        I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+                       }
+               } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
+                       if (tx_flags & I40E_TX_FLAGS_TSO) {
+                               *cd_tunneling |= I40E_TX_CTX_EXT_IP_IPV6;
+                               ip_hdr(skb)->check = 0;
+                       } else {
+                               *cd_tunneling |=
+                                        I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM;
+                       }
+               }
+
+               /* Now set the ctx descriptor fields */
+               *cd_tunneling |= (skb_network_header_len(skb) >> 2) <<
+                                       I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT |
+                                  I40E_TXD_CTX_UDP_TUNNELING            |
+                                  ((skb_inner_network_offset(skb) -
+                                       skb_transport_offset(skb)) >> 1) <<
+                                  I40E_TXD_CTX_QW0_NATLEN_SHIFT;
+
+       } else {
+               network_hdr_len = skb_network_header_len(skb);
+               this_ip_hdr = ip_hdr(skb);
+               this_ipv6_hdr = ipv6_hdr(skb);
+               this_tcp_hdrlen = tcp_hdrlen(skb);
+       }
+
+       /* Enable IP checksum offloads */
+       if (tx_flags & I40E_TX_FLAGS_IPV4) {
+               l4_hdr = this_ip_hdr->protocol;
+               /* the stack computes the IP header already, the only time we
+                * need the hardware to recompute it is in the case of TSO.
+                */
+               if (tx_flags & I40E_TX_FLAGS_TSO) {
+                       *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4_CSUM;
+                       this_ip_hdr->check = 0;
+               } else {
+                       *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV4;
+               }
+               /* Now set the td_offset for IP header length */
+               *td_offset = (network_hdr_len >> 2) <<
+                             I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+       } else if (tx_flags & I40E_TX_FLAGS_IPV6) {
+               l4_hdr = this_ipv6_hdr->nexthdr;
+               *td_cmd |= I40E_TX_DESC_CMD_IIPT_IPV6;
+               /* Now set the td_offset for IP header length */
+               *td_offset = (network_hdr_len >> 2) <<
+                             I40E_TX_DESC_LENGTH_IPLEN_SHIFT;
+       }
+       /* words in MACLEN + dwords in IPLEN + dwords in L4Len */
+       *td_offset |= (skb_network_offset(skb) >> 1) <<
+                      I40E_TX_DESC_LENGTH_MACLEN_SHIFT;
+
+       /* Enable L4 checksum offloads */
+       switch (l4_hdr) {
+       case IPPROTO_TCP:
+               /* enable checksum offloads */
+               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_TCP;
+               *td_offset |= (this_tcp_hdrlen >> 2) <<
+                              I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+               break;
+       case IPPROTO_SCTP:
+               /* enable SCTP checksum offload */
+               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_SCTP;
+               *td_offset |= (sizeof(struct sctphdr) >> 2) <<
+                              I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+               break;
+       case IPPROTO_UDP:
+               /* enable UDP checksum offload */
+               *td_cmd |= I40E_TX_DESC_CMD_L4T_EOFT_UDP;
+               *td_offset |= (sizeof(struct udphdr) >> 2) <<
+                              I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT;
+               break;
+       default:
+               break;
+       }
+}
+
+/**
+ * i40e_create_tx_ctx Build the Tx context descriptor
+ * @tx_ring:  ring to create the descriptor on
+ * @cd_type_cmd_tso_mss: Quad Word 1
+ * @cd_tunneling: Quad Word 0 - bits 0-31
+ * @cd_l2tag2: Quad Word 0 - bits 32-63
+ **/
+static void i40e_create_tx_ctx(struct i40e_ring *tx_ring,
+                              const u64 cd_type_cmd_tso_mss,
+                              const u32 cd_tunneling, const u32 cd_l2tag2)
+{
+       struct i40e_tx_context_desc *context_desc;
+
+       if (!cd_type_cmd_tso_mss && !cd_tunneling && !cd_l2tag2)
+               return;
+
+       /* grab the next descriptor */
+       context_desc = I40E_TX_CTXTDESC(tx_ring, tx_ring->next_to_use);
+       tx_ring->next_to_use++;
+       if (tx_ring->next_to_use == tx_ring->count)
+               tx_ring->next_to_use = 0;
+
+       /* cpu_to_le32 and assign to struct fields */
+       context_desc->tunneling_params = cpu_to_le32(cd_tunneling);
+       context_desc->l2tag2 = cpu_to_le16(cd_l2tag2);
+       context_desc->type_cmd_tso_mss = cpu_to_le64(cd_type_cmd_tso_mss);
+}
+
+/**
+ * i40e_tx_map - Build the Tx descriptor
+ * @tx_ring:  ring to send buffer on
+ * @skb:      send buffer
+ * @first:    first buffer info buffer to use
+ * @tx_flags: collected send information
+ * @hdr_len:  size of the packet header
+ * @td_cmd:   the command field in the descriptor
+ * @td_offset: offset for checksum or crc
+ **/
+static void i40e_tx_map(struct i40e_ring *tx_ring, struct sk_buff *skb,
+                       struct i40e_tx_buffer *first, u32 tx_flags,
+                       const u8 hdr_len, u32 td_cmd, u32 td_offset)
+{
+       struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
+       unsigned int data_len = skb->data_len;
+       unsigned int size = skb_headlen(skb);
+       struct device *dev = tx_ring->dev;
+       u32 paylen = skb->len - hdr_len;
+       u16 i = tx_ring->next_to_use;
+       struct i40e_tx_buffer *tx_bi;
+       struct i40e_tx_desc *tx_desc;
+       u32 buf_offset = 0;
+       u32 td_tag = 0;
+       dma_addr_t dma;
+       u16 gso_segs;
+
+       dma = dma_map_single(dev, skb->data, size, DMA_TO_DEVICE);
+       if (dma_mapping_error(dev, dma))
+               goto dma_error;
+
+       if (tx_flags & I40E_TX_FLAGS_HW_VLAN) {
+               td_cmd |= I40E_TX_DESC_CMD_IL2TAG1;
+               td_tag = (tx_flags & I40E_TX_FLAGS_VLAN_MASK) >>
+                        I40E_TX_FLAGS_VLAN_SHIFT;
+       }
+
+       tx_desc = I40E_TX_DESC(tx_ring, i);
+       for (;;) {
+               while (size > I40E_MAX_DATA_PER_TXD) {
+                       tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset);
+                       tx_desc->cmd_type_offset_bsz =
+                               build_ctob(td_cmd, td_offset,
+                                          I40E_MAX_DATA_PER_TXD, td_tag);
+
+                       buf_offset += I40E_MAX_DATA_PER_TXD;
+                       size -= I40E_MAX_DATA_PER_TXD;
+
+                       tx_desc++;
+                       i++;
+                       if (i == tx_ring->count) {
+                               tx_desc = I40E_TX_DESC(tx_ring, 0);
+                               i = 0;
+                       }
+               }
+
+               tx_bi = &tx_ring->tx_bi[i];
+               tx_bi->length = buf_offset + size;
+               tx_bi->tx_flags = tx_flags;
+               tx_bi->dma = dma;
+
+               tx_desc->buffer_addr = cpu_to_le64(dma + buf_offset);
+               tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
+                                                         size, td_tag);
+
+               if (likely(!data_len))
+                       break;
+
+               size = skb_frag_size(frag);
+               data_len -= size;
+               buf_offset = 0;
+               tx_flags |= I40E_TX_FLAGS_MAPPED_AS_PAGE;
+
+               dma = skb_frag_dma_map(dev, frag, 0, size, DMA_TO_DEVICE);
+               if (dma_mapping_error(dev, dma))
+                       goto dma_error;
+
+               tx_desc++;
+               i++;
+               if (i == tx_ring->count) {
+                       tx_desc = I40E_TX_DESC(tx_ring, 0);
+                       i = 0;
+               }
+
+               frag++;
+       }
+
+       tx_desc->cmd_type_offset_bsz |=
+                      cpu_to_le64((u64)I40E_TXD_CMD << I40E_TXD_QW1_CMD_SHIFT);
+
+       i++;
+       if (i == tx_ring->count)
+               i = 0;
+
+       tx_ring->next_to_use = i;
+
+       if (tx_flags & (I40E_TX_FLAGS_TSO | I40E_TX_FLAGS_FSO))
+               gso_segs = skb_shinfo(skb)->gso_segs;
+       else
+               gso_segs = 1;
+
+       /* multiply data chunks by size of headers */
+       tx_bi->bytecount = paylen + (gso_segs * hdr_len);
+       tx_bi->gso_segs = gso_segs;
+       tx_bi->skb = skb;
+
+       /* set the timestamp and next to watch values */
+       first->time_stamp = jiffies;
+       first->next_to_watch = tx_desc;
+
+       /* Force memory writes to complete before letting h/w
+        * know there are new descriptors to fetch.  (Only
+        * applicable for weak-ordered memory model archs,
+        * such as IA-64).
+        */
+       wmb();
+
+       writel(i, tx_ring->tail);
+       return;
+
+dma_error:
+       dev_info(dev, "TX DMA map failed\n");
+
+       /* clear dma mappings for failed tx_bi map */
+       for (;;) {
+               tx_bi = &tx_ring->tx_bi[i];
+               i40e_unmap_tx_resource(tx_ring, tx_bi);
+               if (tx_bi == first)
+                       break;
+               if (i == 0)
+                       i = tx_ring->count;
+               i--;
+       }
+
+       dev_kfree_skb_any(skb);
+
+       tx_ring->next_to_use = i;
+}
+
+/**
+ * __i40e_maybe_stop_tx - 2nd level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns -EBUSY if a stop is needed, else 0
+ **/
+static inline int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+{
+       netif_stop_subqueue(tx_ring->netdev, tx_ring->queue_index);
+       smp_mb();
+
+       /* Check again in a case another CPU has just made room available. */
+       if (likely(I40E_DESC_UNUSED(tx_ring) < size))
+               return -EBUSY;
+
+       /* A reprieve! - use start_queue because it doesn't call schedule */
+       netif_start_subqueue(tx_ring->netdev, tx_ring->queue_index);
+       ++tx_ring->tx_stats.restart_queue;
+       return 0;
+}
+
+/**
+ * i40e_maybe_stop_tx - 1st level check for tx stop conditions
+ * @tx_ring: the ring to be checked
+ * @size:    the size buffer we want to assure is available
+ *
+ * Returns 0 if stop is not needed
+ **/
+static int i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size)
+{
+       if (likely(I40E_DESC_UNUSED(tx_ring) >= size))
+               return 0;
+       return __i40e_maybe_stop_tx(tx_ring, size);
+}
+
+/**
+ * i40e_xmit_descriptor_count - calculate number of tx descriptors needed
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns number of data descriptors needed for this skb. Returns 0 to indicate
+ * there is not enough descriptors available in this ring since we need at least
+ * one descriptor.
+ **/
+static int i40e_xmit_descriptor_count(struct sk_buff *skb,
+                                     struct i40e_ring *tx_ring)
+{
+#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
+       unsigned int f;
+#endif
+       int count = 0;
+
+       /* need: 1 descriptor per page * PAGE_SIZE/I40E_MAX_DATA_PER_TXD,
+        *       + 1 desc for skb_head_len/I40E_MAX_DATA_PER_TXD,
+        *       + 2 desc gap to keep tail from touching head,
+        *       + 1 desc for context descriptor,
+        * otherwise try next time
+        */
+#if PAGE_SIZE > I40E_MAX_DATA_PER_TXD
+       for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
+               count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
+#else
+       count += skb_shinfo(skb)->nr_frags;
+#endif
+       count += TXD_USE_COUNT(skb_headlen(skb));
+       if (i40e_maybe_stop_tx(tx_ring, count + 3)) {
+               tx_ring->tx_stats.tx_busy++;
+               return 0;
+       }
+       return count;
+}
+
+/**
+ * i40e_xmit_frame_ring - Sends buffer on Tx ring
+ * @skb:     send buffer
+ * @tx_ring: ring to send buffer on
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+static netdev_tx_t i40e_xmit_frame_ring(struct sk_buff *skb,
+                                       struct i40e_ring *tx_ring)
+{
+       u64 cd_type_cmd_tso_mss = I40E_TX_DESC_DTYPE_CONTEXT;
+       u32 cd_tunneling = 0, cd_l2tag2 = 0;
+       struct i40e_tx_buffer *first;
+       u32 td_offset = 0;
+       u32 tx_flags = 0;
+       __be16 protocol;
+       u32 td_cmd = 0;
+       u8 hdr_len = 0;
+       int tso;
+       if (0 == i40e_xmit_descriptor_count(skb, tx_ring))
+               return NETDEV_TX_BUSY;
+
+       /* prepare the xmit flags */
+       if (i40e_tx_prepare_vlan_flags(skb, tx_ring, &tx_flags))
+               goto out_drop;
+
+       /* obtain protocol of skb */
+       protocol = skb->protocol;
+
+       /* record the location of the first descriptor for this packet */
+       first = &tx_ring->tx_bi[tx_ring->next_to_use];
+
+       /* setup IPv4/IPv6 offloads */
+       if (protocol == __constant_htons(ETH_P_IP))
+               tx_flags |= I40E_TX_FLAGS_IPV4;
+       else if (protocol == __constant_htons(ETH_P_IPV6))
+               tx_flags |= I40E_TX_FLAGS_IPV6;
+
+       tso = i40e_tso(tx_ring, skb, tx_flags, protocol, &hdr_len,
+                      &cd_type_cmd_tso_mss, &cd_tunneling);
+
+       if (tso < 0)
+               goto out_drop;
+       else if (tso)
+               tx_flags |= I40E_TX_FLAGS_TSO;
+
+       skb_tx_timestamp(skb);
+
+       /* Always offload the checksum, since it's in the data descriptor */
+       if (i40e_tx_csum(tx_ring, skb, tx_flags, protocol))
+               tx_flags |= I40E_TX_FLAGS_CSUM;
+
+       /* always enable offload insertion */
+       td_cmd |= I40E_TX_DESC_CMD_ICRC;
+
+       if (tx_flags & I40E_TX_FLAGS_CSUM)
+               i40e_tx_enable_csum(skb, tx_flags, &td_cmd, &td_offset,
+                                   tx_ring, &cd_tunneling);
+
+       i40e_create_tx_ctx(tx_ring, cd_type_cmd_tso_mss,
+                          cd_tunneling, cd_l2tag2);
+
+       /* Add Flow Director ATR if it's enabled.
+        *
+        * NOTE: this must always be directly before the data descriptor.
+        */
+       i40e_atr(tx_ring, skb, tx_flags, protocol);
+
+       i40e_tx_map(tx_ring, skb, first, tx_flags, hdr_len,
+                   td_cmd, td_offset);
+
+       i40e_maybe_stop_tx(tx_ring, DESC_NEEDED);
+
+       return NETDEV_TX_OK;
+
+out_drop:
+       dev_kfree_skb_any(skb);
+       return NETDEV_TX_OK;
+}
+
+/**
+ * i40e_lan_xmit_frame - Selects the correct VSI and Tx queue to send buffer
+ * @skb:    send buffer
+ * @netdev: network interface device structure
+ *
+ * Returns NETDEV_TX_OK if sent, else an error code
+ **/
+netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_ring *tx_ring = &vsi->tx_rings[skb->queue_mapping];
+
+       /* hardware can't handle really short frames, hardware padding works
+        * beyond this point
+        */
+       if (unlikely(skb->len < I40E_MIN_TX_LEN)) {
+               if (skb_pad(skb, I40E_MIN_TX_LEN - skb->len))
+                       return NETDEV_TX_OK;
+               skb->len = I40E_MIN_TX_LEN;
+               skb_set_tail_pointer(skb, I40E_MIN_TX_LEN);
+       }
+
+       return i40e_xmit_frame_ring(skb, tx_ring);
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
new file mode 100644 (file)
index 0000000..b1d7722
--- /dev/null
@@ -0,0 +1,259 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+/* Interrupt Throttling and Rate Limiting (storm control) Goodies */
+
+#define I40E_MAX_ITR               0x07FF
+#define I40E_MIN_ITR               0x0001
+#define I40E_ITR_USEC_RESOLUTION   2
+#define I40E_MAX_IRATE             0x03F
+#define I40E_MIN_IRATE             0x001
+#define I40E_IRATE_USEC_RESOLUTION 4
+#define I40E_ITR_100K              0x0005
+#define I40E_ITR_20K               0x0019
+#define I40E_ITR_8K                0x003E
+#define I40E_ITR_4K                0x007A
+#define I40E_ITR_RX_DEF            I40E_ITR_8K
+#define I40E_ITR_TX_DEF            I40E_ITR_4K
+#define I40E_ITR_DYNAMIC           0x8000  /* use top bit as a flag */
+#define I40E_MIN_INT_RATE          250     /* ~= 1000000 / (I40E_MAX_ITR * 2) */
+#define I40E_MAX_INT_RATE          500000  /* == 1000000 / (I40E_MIN_ITR * 2) */
+#define I40E_DEFAULT_IRQ_WORK      256
+#define ITR_TO_REG(setting) ((setting & ~I40E_ITR_DYNAMIC) >> 1)
+#define ITR_IS_DYNAMIC(setting) (!!(setting & I40E_ITR_DYNAMIC))
+#define ITR_REG_TO_USEC(itr_reg) (itr_reg << 1)
+
+#define I40E_QUEUE_END_OF_LIST 0x7FF
+
+#define I40E_ITR_NONE  3
+#define I40E_RX_ITR    0
+#define I40E_TX_ITR    1
+#define I40E_PE_ITR    2
+/* Supported Rx Buffer Sizes */
+#define I40E_RXBUFFER_512   512    /* Used for packet split */
+#define I40E_RXBUFFER_2048  2048
+#define I40E_RXBUFFER_3072  3072   /* For FCoE MTU of 2158 */
+#define I40E_RXBUFFER_4096  4096
+#define I40E_RXBUFFER_8192  8192
+#define I40E_MAX_RXBUFFER   9728  /* largest size for single descriptor */
+
+/* NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN means we
+ * reserve 2 more, and skb_shared_info adds an additional 384 bytes more,
+ * this adds up to 512 bytes of extra data meaning the smallest allocation
+ * we could have is 1K.
+ * i.e. RXBUFFER_512 --> size-1024 slab
+ */
+#define I40E_RX_HDR_SIZE  I40E_RXBUFFER_512
+
+/* How many Rx Buffers do we bundle into one write to the hardware ? */
+#define I40E_RX_BUFFER_WRITE   16      /* Must be power of 2 */
+#define I40E_RX_NEXT_DESC(r, i, n)             \
+       do {                                    \
+               (i)++;                          \
+               if ((i) == (r)->count)          \
+                       i = 0;                  \
+               (n) = I40E_RX_DESC((r), (i));   \
+       } while (0)
+
+#define I40E_RX_NEXT_DESC_PREFETCH(r, i, n)            \
+       do {                                            \
+               I40E_RX_NEXT_DESC((r), (i), (n));       \
+               prefetch((n));                          \
+       } while (0)
+
+#define i40e_rx_desc i40e_32byte_rx_desc
+
+#define I40E_MIN_TX_LEN                17
+#define I40E_MAX_DATA_PER_TXD  16383   /* aka 16kB - 1 */
+
+/* Tx Descriptors needed, worst case */
+#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), I40E_MAX_DATA_PER_TXD)
+#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4)
+
+#define I40E_TX_FLAGS_CSUM             (u32)(1)
+#define I40E_TX_FLAGS_HW_VLAN          (u32)(1 << 1)
+#define I40E_TX_FLAGS_SW_VLAN          (u32)(1 << 2)
+#define I40E_TX_FLAGS_TSO              (u32)(1 << 3)
+#define I40E_TX_FLAGS_IPV4             (u32)(1 << 4)
+#define I40E_TX_FLAGS_IPV6             (u32)(1 << 5)
+#define I40E_TX_FLAGS_FCCRC            (u32)(1 << 6)
+#define I40E_TX_FLAGS_FSO              (u32)(1 << 7)
+#define I40E_TX_FLAGS_TXSW             (u32)(1 << 8)
+#define I40E_TX_FLAGS_MAPPED_AS_PAGE   (u32)(1 << 9)
+#define I40E_TX_FLAGS_VLAN_MASK                0xffff0000
+#define I40E_TX_FLAGS_VLAN_PRIO_MASK   0xe0000000
+#define I40E_TX_FLAGS_VLAN_PRIO_SHIFT  29
+#define I40E_TX_FLAGS_VLAN_SHIFT       16
+
+struct i40e_tx_buffer {
+       struct sk_buff *skb;
+       dma_addr_t dma;
+       unsigned long time_stamp;
+       u16 length;
+       u32 tx_flags;
+       struct i40e_tx_desc *next_to_watch;
+       unsigned int bytecount;
+       u16 gso_segs;
+       u8 mapped_as_page;
+};
+
+struct i40e_rx_buffer {
+       struct sk_buff *skb;
+       dma_addr_t dma;
+       struct page *page;
+       dma_addr_t page_dma;
+       unsigned int page_offset;
+};
+
+struct i40e_tx_queue_stats {
+       u64 packets;
+       u64 bytes;
+       u64 restart_queue;
+       u64 tx_busy;
+       u64 completed;
+       u64 tx_done_old;
+};
+
+struct i40e_rx_queue_stats {
+       u64 packets;
+       u64 bytes;
+       u64 non_eop_descs;
+       u64 alloc_rx_page_failed;
+       u64 alloc_rx_buff_failed;
+};
+
+enum i40e_ring_state_t {
+       __I40E_TX_FDIR_INIT_DONE,
+       __I40E_TX_XPS_INIT_DONE,
+       __I40E_TX_DETECT_HANG,
+       __I40E_HANG_CHECK_ARMED,
+       __I40E_RX_PS_ENABLED,
+       __I40E_RX_LRO_ENABLED,
+       __I40E_RX_16BYTE_DESC_ENABLED,
+};
+
+#define ring_is_ps_enabled(ring) \
+       test_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
+#define set_ring_ps_enabled(ring) \
+       set_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
+#define clear_ring_ps_enabled(ring) \
+       clear_bit(__I40E_RX_PS_ENABLED, &(ring)->state)
+#define check_for_tx_hang(ring) \
+       test_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
+#define set_check_for_tx_hang(ring) \
+       set_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
+#define clear_check_for_tx_hang(ring) \
+       clear_bit(__I40E_TX_DETECT_HANG, &(ring)->state)
+#define ring_is_lro_enabled(ring) \
+       test_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
+#define set_ring_lro_enabled(ring) \
+       set_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
+#define clear_ring_lro_enabled(ring) \
+       clear_bit(__I40E_RX_LRO_ENABLED, &(ring)->state)
+#define ring_is_16byte_desc_enabled(ring) \
+       test_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
+#define set_ring_16byte_desc_enabled(ring) \
+       set_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
+#define clear_ring_16byte_desc_enabled(ring) \
+       clear_bit(__I40E_RX_16BYTE_DESC_ENABLED, &(ring)->state)
+
+/* struct that defines a descriptor ring, associated with a VSI */
+struct i40e_ring {
+       void *desc;                     /* Descriptor ring memory */
+       struct device *dev;             /* Used for DMA mapping */
+       struct net_device *netdev;      /* netdev ring maps to */
+       union {
+               struct i40e_tx_buffer *tx_bi;
+               struct i40e_rx_buffer *rx_bi;
+       };
+       unsigned long state;
+       u16 queue_index;                /* Queue number of ring */
+       u8 dcb_tc;                      /* Traffic class of ring */
+       u8 __iomem *tail;
+
+       u16 count;                      /* Number of descriptors */
+       u16 reg_idx;                    /* HW register index of the ring */
+       u16 rx_hdr_len;
+       u16 rx_buf_len;
+       u8  dtype;
+#define I40E_RX_DTYPE_NO_SPLIT      0
+#define I40E_RX_DTYPE_SPLIT_ALWAYS  1
+#define I40E_RX_DTYPE_HEADER_SPLIT  2
+       u8  hsplit;
+#define I40E_RX_SPLIT_L2      0x1
+#define I40E_RX_SPLIT_IP      0x2
+#define I40E_RX_SPLIT_TCP_UDP 0x4
+#define I40E_RX_SPLIT_SCTP    0x8
+
+       /* used in interrupt processing */
+       u16 next_to_use;
+       u16 next_to_clean;
+
+       u8 atr_sample_rate;
+       u8 atr_count;
+
+       bool ring_active;               /* is ring online or not */
+
+       /* stats structs */
+       union {
+               struct i40e_tx_queue_stats tx_stats;
+               struct i40e_rx_queue_stats rx_stats;
+       };
+
+       unsigned int size;              /* length of descriptor ring in bytes */
+       dma_addr_t dma;                 /* physical address of ring */
+
+       struct i40e_vsi *vsi;           /* Backreference to associated VSI */
+       struct i40e_q_vector *q_vector; /* Backreference to associated vector */
+} ____cacheline_internodealigned_in_smp;
+
+enum i40e_latency_range {
+       I40E_LOWEST_LATENCY = 0,
+       I40E_LOW_LATENCY = 1,
+       I40E_BULK_LATENCY = 2,
+};
+
+struct i40e_ring_container {
+#define I40E_MAX_RINGPAIR_PER_VECTOR 8
+       /* array of pointers to rings */
+       struct i40e_ring *ring[I40E_MAX_RINGPAIR_PER_VECTOR];
+       unsigned int total_bytes;       /* total bytes processed this int */
+       unsigned int total_packets;     /* total packets processed this int */
+       u16 count;
+       enum i40e_latency_range latency_range;
+       u16 itr;
+};
+
+void i40e_alloc_rx_buffers(struct i40e_ring *rxr, u16 cleaned_count);
+netdev_tx_t i40e_lan_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+void i40e_clean_tx_ring(struct i40e_ring *tx_ring);
+void i40e_clean_rx_ring(struct i40e_ring *rx_ring);
+int i40e_setup_tx_descriptors(struct i40e_ring *tx_ring);
+int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring);
+void i40e_free_tx_resources(struct i40e_ring *tx_ring);
+void i40e_free_rx_resources(struct i40e_ring *rx_ring);
+int i40e_napi_poll(struct napi_struct *napi, int budget);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
new file mode 100644 (file)
index 0000000..f3f22b2
--- /dev/null
@@ -0,0 +1,1154 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_TYPE_H_
+#define _I40E_TYPE_H_
+
+#include "i40e_status.h"
+#include "i40e_osdep.h"
+#include "i40e_register.h"
+#include "i40e_adminq.h"
+#include "i40e_hmc.h"
+#include "i40e_lan_hmc.h"
+
+/* Device IDs */
+#define I40E_SFP_XL710_DEVICE_ID       0x1572
+#define I40E_SFP_X710_DEVICE_ID                0x1573
+#define I40E_QEMU_DEVICE_ID            0x1574
+#define I40E_KX_A_DEVICE_ID            0x157F
+#define I40E_KX_B_DEVICE_ID            0x1580
+#define I40E_KX_C_DEVICE_ID            0x1581
+#define I40E_KX_D_DEVICE_ID            0x1582
+#define I40E_QSFP_A_DEVICE_ID          0x1583
+#define I40E_QSFP_B_DEVICE_ID          0x1584
+#define I40E_QSFP_C_DEVICE_ID          0x1585
+#define I40E_VF_DEVICE_ID              0x154C
+#define I40E_VF_HV_DEVICE_ID           0x1571
+
+#define I40E_FW_API_VERSION_MAJOR  0x0001
+#define I40E_FW_API_VERSION_MINOR  0x0000
+
+#define I40E_MAX_VSI_QP                        16
+#define I40E_MAX_VF_VSI                        3
+#define I40E_MAX_CHAINED_RX_BUFFERS    5
+
+/* Max default timeout in ms, */
+#define I40E_MAX_NVM_TIMEOUT           18000
+
+/* Check whether address is multicast.  This is little-endian specific check.*/
+#define I40E_IS_MULTICAST(address)     \
+       (bool)(((u8 *)(address))[0] & ((u8)0x01))
+
+/* Check whether an address is broadcast. */
+#define I40E_IS_BROADCAST(address)     \
+       ((((u8 *)(address))[0] == ((u8)0xff)) && \
+       (((u8 *)(address))[1] == ((u8)0xff)))
+
+/* Switch from mc to the 2usec global time (this is the GTIME resolution) */
+#define I40E_MS_TO_GTIME(time)         (((time) * 1000) / 2)
+
+/* forward declaration */
+struct i40e_hw;
+typedef void (*I40E_ADMINQ_CALLBACK)(struct i40e_hw *, struct i40e_aq_desc *);
+
+#define I40E_ETH_LENGTH_OF_ADDRESS     6
+
+/* Data type manipulation macros. */
+
+#define I40E_DESC_UNUSED(R)    \
+       ((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+       (R)->next_to_clean - (R)->next_to_use - 1)
+
+/* bitfields for Tx queue mapping in QTX_CTL */
+#define I40E_QTX_CTL_VF_QUEUE  0x0
+#define I40E_QTX_CTL_PF_QUEUE  0x2
+
+/* debug masks */
+enum i40e_debug_mask {
+       I40E_DEBUG_INIT                 = 0x00000001,
+       I40E_DEBUG_RELEASE              = 0x00000002,
+
+       I40E_DEBUG_LINK                 = 0x00000010,
+       I40E_DEBUG_PHY                  = 0x00000020,
+       I40E_DEBUG_HMC                  = 0x00000040,
+       I40E_DEBUG_NVM                  = 0x00000080,
+       I40E_DEBUG_LAN                  = 0x00000100,
+       I40E_DEBUG_FLOW                 = 0x00000200,
+       I40E_DEBUG_DCB                  = 0x00000400,
+       I40E_DEBUG_DIAG                 = 0x00000800,
+
+       I40E_DEBUG_AQ_MESSAGE           = 0x01000000, /* for i40e_debug() */
+       I40E_DEBUG_AQ_DESCRIPTOR        = 0x02000000,
+       I40E_DEBUG_AQ_DESC_BUFFER       = 0x04000000,
+       I40E_DEBUG_AQ_COMMAND           = 0x06000000, /* for i40e_debug_aq() */
+       I40E_DEBUG_AQ                   = 0x0F000000,
+
+       I40E_DEBUG_USER                 = 0xF0000000,
+
+       I40E_DEBUG_ALL                  = 0xFFFFFFFF
+};
+
+/* These are structs for managing the hardware information and the operations.
+ * The structures of function pointers are filled out at init time when we
+ * know for sure exactly which hardware we're working with.  This gives us the
+ * flexibility of using the same main driver code but adapting to slightly
+ * different hardware needs as new parts are developed.  For this architecture,
+ * the Firmware and AdminQ are intended to insulate the driver from most of the
+ * future changes, but these structures will also do part of the job.
+ */
+enum i40e_mac_type {
+       I40E_MAC_UNKNOWN = 0,
+       I40E_MAC_X710,
+       I40E_MAC_XL710,
+       I40E_MAC_VF,
+       I40E_MAC_GENERIC,
+};
+
+enum i40e_media_type {
+       I40E_MEDIA_TYPE_UNKNOWN = 0,
+       I40E_MEDIA_TYPE_FIBER,
+       I40E_MEDIA_TYPE_BASET,
+       I40E_MEDIA_TYPE_BACKPLANE,
+       I40E_MEDIA_TYPE_CX4,
+       I40E_MEDIA_TYPE_VIRTUAL
+};
+
+enum i40e_fc_mode {
+       I40E_FC_NONE = 0,
+       I40E_FC_RX_PAUSE,
+       I40E_FC_TX_PAUSE,
+       I40E_FC_FULL,
+       I40E_FC_PFC,
+       I40E_FC_DEFAULT
+};
+
+enum i40e_vsi_type {
+       I40E_VSI_MAIN = 0,
+       I40E_VSI_VMDQ1,
+       I40E_VSI_VMDQ2,
+       I40E_VSI_CTRL,
+       I40E_VSI_FCOE,
+       I40E_VSI_MIRROR,
+       I40E_VSI_SRIOV,
+       I40E_VSI_FDIR,
+       I40E_VSI_TYPE_UNKNOWN
+};
+
+enum i40e_queue_type {
+       I40E_QUEUE_TYPE_RX = 0,
+       I40E_QUEUE_TYPE_TX,
+       I40E_QUEUE_TYPE_PE_CEQ,
+       I40E_QUEUE_TYPE_UNKNOWN
+};
+
+struct i40e_link_status {
+       enum i40e_aq_phy_type phy_type;
+       enum i40e_aq_link_speed link_speed;
+       u8 link_info;
+       u8 an_info;
+       u8 ext_info;
+       /* is Link Status Event notification to SW enabled */
+       bool lse_enable;
+};
+
+struct i40e_phy_info {
+       struct i40e_link_status link_info;
+       struct i40e_link_status link_info_old;
+       u32 autoneg_advertised;
+       u32 phy_id;
+       u32 module_type;
+       bool get_link_info;
+       enum i40e_media_type media_type;
+};
+
+#define I40E_HW_CAP_MAX_GPIO                   30
+/* Capabilities of a PF or a VF or the whole device */
+struct i40e_hw_capabilities {
+       u32  switch_mode;
+#define I40E_NVM_IMAGE_TYPE_EVB                0x0
+#define I40E_NVM_IMAGE_TYPE_CLOUD      0x2
+#define I40E_NVM_IMAGE_TYPE_UDP_CLOUD  0x3
+
+       u32  management_mode;
+       u32  npar_enable;
+       u32  os2bmc;
+       u32  valid_functions;
+       bool sr_iov_1_1;
+       bool vmdq;
+       bool evb_802_1_qbg; /* Edge Virtual Bridging */
+       bool evb_802_1_qbh; /* Bridge Port Extension */
+       bool dcb;
+       bool fcoe;
+       bool mfp_mode_1;
+       bool mgmt_cem;
+       bool ieee_1588;
+       bool iwarp;
+       bool fd;
+       u32 fd_filters_guaranteed;
+       u32 fd_filters_best_effort;
+       bool rss;
+       u32 rss_table_size;
+       u32 rss_table_entry_width;
+       bool led[I40E_HW_CAP_MAX_GPIO];
+       bool sdp[I40E_HW_CAP_MAX_GPIO];
+       u32 nvm_image_type;
+       u32 num_flow_director_filters;
+       u32 num_vfs;
+       u32 vf_base_id;
+       u32 num_vsis;
+       u32 num_rx_qp;
+       u32 num_tx_qp;
+       u32 base_queue;
+       u32 num_msix_vectors;
+       u32 num_msix_vectors_vf;
+       u32 led_pin_num;
+       u32 sdp_pin_num;
+       u32 mdio_port_num;
+       u32 mdio_port_mode;
+       u8 rx_buf_chain_len;
+       u32 enabled_tcmap;
+       u32 maxtc;
+};
+
+struct i40e_mac_info {
+       enum i40e_mac_type type;
+       u8 addr[I40E_ETH_LENGTH_OF_ADDRESS];
+       u8 perm_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+       u8 san_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+       u16 max_fcoeq;
+};
+
+enum i40e_aq_resources_ids {
+       I40E_NVM_RESOURCE_ID = 1
+};
+
+enum i40e_aq_resource_access_type {
+       I40E_RESOURCE_READ = 1,
+       I40E_RESOURCE_WRITE
+};
+
+struct i40e_nvm_info {
+       u64 hw_semaphore_timeout; /* 2usec global time (GTIME resolution) */
+       u64 hw_semaphore_wait;    /* - || - */
+       u32 timeout;              /* [ms] */
+       u16 sr_size;              /* Shadow RAM size in words */
+       bool blank_nvm_mode;      /* is NVM empty (no FW present)*/
+       u16 version;              /* NVM package version */
+       u32 eetrack;              /* NVM data version */
+};
+
+/* PCI bus types */
+enum i40e_bus_type {
+       i40e_bus_type_unknown = 0,
+       i40e_bus_type_pci,
+       i40e_bus_type_pcix,
+       i40e_bus_type_pci_express,
+       i40e_bus_type_reserved
+};
+
+/* PCI bus speeds */
+enum i40e_bus_speed {
+       i40e_bus_speed_unknown  = 0,
+       i40e_bus_speed_33       = 33,
+       i40e_bus_speed_66       = 66,
+       i40e_bus_speed_100      = 100,
+       i40e_bus_speed_120      = 120,
+       i40e_bus_speed_133      = 133,
+       i40e_bus_speed_2500     = 2500,
+       i40e_bus_speed_5000     = 5000,
+       i40e_bus_speed_8000     = 8000,
+       i40e_bus_speed_reserved
+};
+
+/* PCI bus widths */
+enum i40e_bus_width {
+       i40e_bus_width_unknown  = 0,
+       i40e_bus_width_pcie_x1  = 1,
+       i40e_bus_width_pcie_x2  = 2,
+       i40e_bus_width_pcie_x4  = 4,
+       i40e_bus_width_pcie_x8  = 8,
+       i40e_bus_width_32       = 32,
+       i40e_bus_width_64       = 64,
+       i40e_bus_width_reserved
+};
+
+/* Bus parameters */
+struct i40e_bus_info {
+       enum i40e_bus_speed speed;
+       enum i40e_bus_width width;
+       enum i40e_bus_type type;
+
+       u16 func;
+       u16 device;
+       u16 lan_id;
+};
+
+/* Flow control (FC) parameters */
+struct i40e_fc_info {
+       enum i40e_fc_mode current_mode; /* FC mode in effect */
+       enum i40e_fc_mode requested_mode; /* FC mode requested by caller */
+};
+
+#define I40E_MAX_TRAFFIC_CLASS         8
+#define I40E_MAX_USER_PRIORITY         8
+#define I40E_DCBX_MAX_APPS             32
+#define I40E_LLDPDU_SIZE               1500
+
+/* IEEE 802.1Qaz ETS Configuration data */
+struct i40e_ieee_ets_config {
+       u8 willing;
+       u8 cbs;
+       u8 maxtcs;
+       u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
+       u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
+       u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* IEEE 802.1Qaz ETS Recommendation data */
+struct i40e_ieee_ets_recommend {
+       u8 prioritytable[I40E_MAX_TRAFFIC_CLASS];
+       u8 tcbwtable[I40E_MAX_TRAFFIC_CLASS];
+       u8 tsatable[I40E_MAX_TRAFFIC_CLASS];
+};
+
+/* IEEE 802.1Qaz PFC Configuration data */
+struct i40e_ieee_pfc_config {
+       u8 willing;
+       u8 mbc;
+       u8 pfccap;
+       u8 pfcenable;
+};
+
+/* IEEE 802.1Qaz Application Priority data */
+struct i40e_ieee_app_priority_table {
+       u8  priority;
+       u8  selector;
+       u16 protocolid;
+};
+
+struct i40e_dcbx_config {
+       u32 numapps;
+       struct i40e_ieee_ets_config etscfg;
+       struct i40e_ieee_ets_recommend etsrec;
+       struct i40e_ieee_pfc_config pfc;
+       struct i40e_ieee_app_priority_table app[I40E_DCBX_MAX_APPS];
+};
+
+/* Port hardware description */
+struct i40e_hw {
+       u8 __iomem *hw_addr;
+       void *back;
+
+       /* function pointer structs */
+       struct i40e_phy_info phy;
+       struct i40e_mac_info mac;
+       struct i40e_bus_info bus;
+       struct i40e_nvm_info nvm;
+       struct i40e_fc_info fc;
+
+       /* pci info */
+       u16 device_id;
+       u16 vendor_id;
+       u16 subsystem_device_id;
+       u16 subsystem_vendor_id;
+       u8 revision_id;
+       u8 port;
+       bool adapter_stopped;
+
+       /* capabilities for entire device and PCI func */
+       struct i40e_hw_capabilities dev_caps;
+       struct i40e_hw_capabilities func_caps;
+
+       /* Flow Director shared filter space */
+       u16 fdir_shared_filter_count;
+
+       /* device profile info */
+       u8  pf_id;
+       u16 main_vsi_seid;
+
+       /* Closest numa node to the device */
+       u16 numa_node;
+
+       /* Admin Queue info */
+       struct i40e_adminq_info aq;
+
+       /* HMC info */
+       struct i40e_hmc_info hmc; /* HMC info struct */
+
+       /* LLDP/DCBX Status */
+       u16 dcbx_status;
+
+       /* DCBX info */
+       struct i40e_dcbx_config local_dcbx_config;
+       struct i40e_dcbx_config remote_dcbx_config;
+
+       /* debug mask */
+       u32 debug_mask;
+};
+
+struct i40e_driver_version {
+       u8 major_version;
+       u8 minor_version;
+       u8 build_version;
+       u8 subbuild_version;
+};
+
+/* RX Descriptors */
+union i40e_16byte_rx_desc {
+       struct {
+               __le64 pkt_addr; /* Packet buffer address */
+               __le64 hdr_addr; /* Header buffer address */
+       } read;
+       struct {
+               struct {
+                       struct {
+                               union {
+                                       __le16 mirroring_status;
+                                       __le16 fcoe_ctx_id;
+                               } mirr_fcoe;
+                               __le16 l2tag1;
+                       } lo_dword;
+                       union {
+                               __le32 rss; /* RSS Hash */
+                               __le32 fd_id; /* Flow director filter id */
+                               __le32 fcoe_param; /* FCoE DDP Context id */
+                       } hi_dword;
+               } qword0;
+               struct {
+                       /* ext status/error/pktype/length */
+                       __le64 status_error_len;
+               } qword1;
+       } wb;  /* writeback */
+};
+
+union i40e_32byte_rx_desc {
+       struct {
+               __le64  pkt_addr; /* Packet buffer address */
+               __le64  hdr_addr; /* Header buffer address */
+                       /* bit 0 of hdr_buffer_addr is DD bit */
+               __le64  rsvd1;
+               __le64  rsvd2;
+       } read;
+       struct {
+               struct {
+                       struct {
+                               union {
+                                       __le16 mirroring_status;
+                                       __le16 fcoe_ctx_id;
+                               } mirr_fcoe;
+                               __le16 l2tag1;
+                       } lo_dword;
+                       union {
+                               __le32 rss; /* RSS Hash */
+                               __le32 fcoe_param; /* FCoE DDP Context id */
+                       } hi_dword;
+               } qword0;
+               struct {
+                       /* status/error/pktype/length */
+                       __le64 status_error_len;
+               } qword1;
+               struct {
+                       __le16 ext_status; /* extended status */
+                       __le16 rsvd;
+                       __le16 l2tag2_1;
+                       __le16 l2tag2_2;
+               } qword2;
+               struct {
+                       union {
+                               __le32 flex_bytes_lo;
+                               __le32 pe_status;
+                       } lo_dword;
+                       union {
+                               __le32 flex_bytes_hi;
+                               __le32 fd_id;
+                       } hi_dword;
+               } qword3;
+       } wb;  /* writeback */
+};
+
+#define I40E_RXD_QW1_STATUS_SHIFT      0
+#define I40E_RXD_QW1_STATUS_MASK       (0x7FFFUL << I40E_RXD_QW1_STATUS_SHIFT)
+
+enum i40e_rx_desc_status_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_DESC_STATUS_DD_SHIFT            = 0,
+       I40E_RX_DESC_STATUS_EOF_SHIFT           = 1,
+       I40E_RX_DESC_STATUS_L2TAG1P_SHIFT       = 2,
+       I40E_RX_DESC_STATUS_L3L4P_SHIFT         = 3,
+       I40E_RX_DESC_STATUS_CRCP_SHIFT          = 4,
+       I40E_RX_DESC_STATUS_TSYNINDX_SHIFT      = 5, /* 3 BITS */
+       I40E_RX_DESC_STATUS_PIF_SHIFT           = 8,
+       I40E_RX_DESC_STATUS_UMBCAST_SHIFT       = 9, /* 2 BITS */
+       I40E_RX_DESC_STATUS_FLM_SHIFT           = 11,
+       I40E_RX_DESC_STATUS_FLTSTAT_SHIFT       = 12, /* 2 BITS */
+       I40E_RX_DESC_STATUS_LPBK_SHIFT          = 14
+};
+
+#define I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT   I40E_RX_DESC_STATUS_TSYNINDX_SHIFT
+#define I40E_RXD_QW1_STATUS_TSYNINDX_MASK      (0x7UL << \
+                                            I40E_RXD_QW1_STATUS_TSYNINDX_SHIFT)
+
+enum i40e_rx_desc_fltstat_values {
+       I40E_RX_DESC_FLTSTAT_NO_DATA    = 0,
+       I40E_RX_DESC_FLTSTAT_RSV_FD_ID  = 1, /* 16byte desc? FD_ID : RSV */
+       I40E_RX_DESC_FLTSTAT_RSV        = 2,
+       I40E_RX_DESC_FLTSTAT_RSS_HASH   = 3,
+};
+
+#define I40E_RXD_QW1_ERROR_SHIFT       19
+#define I40E_RXD_QW1_ERROR_MASK                (0xFFUL << I40E_RXD_QW1_ERROR_SHIFT)
+
+enum i40e_rx_desc_error_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_DESC_ERROR_RXE_SHIFT            = 0,
+       I40E_RX_DESC_ERROR_RECIPE_SHIFT         = 1,
+       I40E_RX_DESC_ERROR_HBO_SHIFT            = 2,
+       I40E_RX_DESC_ERROR_L3L4E_SHIFT          = 3, /* 3 BITS */
+       I40E_RX_DESC_ERROR_IPE_SHIFT            = 3,
+       I40E_RX_DESC_ERROR_L4E_SHIFT            = 4,
+       I40E_RX_DESC_ERROR_EIPE_SHIFT           = 5,
+       I40E_RX_DESC_ERROR_OVERSIZE_SHIFT       = 6
+};
+
+enum i40e_rx_desc_error_l3l4e_fcoe_masks {
+       I40E_RX_DESC_ERROR_L3L4E_NONE           = 0,
+       I40E_RX_DESC_ERROR_L3L4E_PROT           = 1,
+       I40E_RX_DESC_ERROR_L3L4E_FC             = 2,
+       I40E_RX_DESC_ERROR_L3L4E_DMAC_ERR       = 3,
+       I40E_RX_DESC_ERROR_L3L4E_DMAC_WARN      = 4
+};
+
+#define I40E_RXD_QW1_PTYPE_SHIFT       30
+#define I40E_RXD_QW1_PTYPE_MASK                (0xFFULL << I40E_RXD_QW1_PTYPE_SHIFT)
+
+/* Packet type non-ip values */
+enum i40e_rx_l2_ptype {
+       I40E_RX_PTYPE_L2_RESERVED               = 0,
+       I40E_RX_PTYPE_L2_MAC_PAY2               = 1,
+       I40E_RX_PTYPE_L2_TIMESYNC_PAY2          = 2,
+       I40E_RX_PTYPE_L2_FIP_PAY2               = 3,
+       I40E_RX_PTYPE_L2_OUI_PAY2               = 4,
+       I40E_RX_PTYPE_L2_MACCNTRL_PAY2          = 5,
+       I40E_RX_PTYPE_L2_LLDP_PAY2              = 6,
+       I40E_RX_PTYPE_L2_ECP_PAY2               = 7,
+       I40E_RX_PTYPE_L2_EVB_PAY2               = 8,
+       I40E_RX_PTYPE_L2_QCN_PAY2               = 9,
+       I40E_RX_PTYPE_L2_EAPOL_PAY2             = 10,
+       I40E_RX_PTYPE_L2_ARP                    = 11,
+       I40E_RX_PTYPE_L2_FCOE_PAY3              = 12,
+       I40E_RX_PTYPE_L2_FCOE_FCDATA_PAY3       = 13,
+       I40E_RX_PTYPE_L2_FCOE_FCRDY_PAY3        = 14,
+       I40E_RX_PTYPE_L2_FCOE_FCRSP_PAY3        = 15,
+       I40E_RX_PTYPE_L2_FCOE_FCOTHER_PA        = 16,
+       I40E_RX_PTYPE_L2_FCOE_VFT_PAY3          = 17,
+       I40E_RX_PTYPE_L2_FCOE_VFT_FCDATA        = 18,
+       I40E_RX_PTYPE_L2_FCOE_VFT_FCRDY         = 19,
+       I40E_RX_PTYPE_L2_FCOE_VFT_FCRSP         = 20,
+       I40E_RX_PTYPE_L2_FCOE_VFT_FCOTHER       = 21
+};
+
+struct i40e_rx_ptype_decoded {
+       u32 ptype:8;
+       u32 known:1;
+       u32 outer_ip:1;
+       u32 outer_ip_ver:1;
+       u32 outer_frag:1;
+       u32 tunnel_type:3;
+       u32 tunnel_end_prot:2;
+       u32 tunnel_end_frag:1;
+       u32 inner_prot:4;
+       u32 payload_layer:3;
+};
+
+enum i40e_rx_ptype_outer_ip {
+       I40E_RX_PTYPE_OUTER_L2  = 0,
+       I40E_RX_PTYPE_OUTER_IP  = 1
+};
+
+enum i40e_rx_ptype_outer_ip_ver {
+       I40E_RX_PTYPE_OUTER_NONE        = 0,
+       I40E_RX_PTYPE_OUTER_IPV4        = 0,
+       I40E_RX_PTYPE_OUTER_IPV6        = 1
+};
+
+enum i40e_rx_ptype_outer_fragmented {
+       I40E_RX_PTYPE_NOT_FRAG  = 0,
+       I40E_RX_PTYPE_FRAG      = 1
+};
+
+enum i40e_rx_ptype_tunnel_type {
+       I40E_RX_PTYPE_TUNNEL_NONE               = 0,
+       I40E_RX_PTYPE_TUNNEL_IP_IP              = 1,
+       I40E_RX_PTYPE_TUNNEL_IP_GRENAT          = 2,
+       I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC      = 3,
+       I40E_RX_PTYPE_TUNNEL_IP_GRENAT_MAC_VLAN = 4,
+};
+
+enum i40e_rx_ptype_tunnel_end_prot {
+       I40E_RX_PTYPE_TUNNEL_END_NONE   = 0,
+       I40E_RX_PTYPE_TUNNEL_END_IPV4   = 1,
+       I40E_RX_PTYPE_TUNNEL_END_IPV6   = 2,
+};
+
+enum i40e_rx_ptype_inner_prot {
+       I40E_RX_PTYPE_INNER_PROT_NONE           = 0,
+       I40E_RX_PTYPE_INNER_PROT_UDP            = 1,
+       I40E_RX_PTYPE_INNER_PROT_TCP            = 2,
+       I40E_RX_PTYPE_INNER_PROT_SCTP           = 3,
+       I40E_RX_PTYPE_INNER_PROT_ICMP           = 4,
+       I40E_RX_PTYPE_INNER_PROT_TIMESYNC       = 5
+};
+
+enum i40e_rx_ptype_payload_layer {
+       I40E_RX_PTYPE_PAYLOAD_LAYER_NONE        = 0,
+       I40E_RX_PTYPE_PAYLOAD_LAYER_PAY2        = 1,
+       I40E_RX_PTYPE_PAYLOAD_LAYER_PAY3        = 2,
+       I40E_RX_PTYPE_PAYLOAD_LAYER_PAY4        = 3,
+};
+
+#define I40E_RXD_QW1_LENGTH_PBUF_SHIFT 38
+#define I40E_RXD_QW1_LENGTH_PBUF_MASK  (0x3FFFULL << \
+                                        I40E_RXD_QW1_LENGTH_PBUF_SHIFT)
+
+#define I40E_RXD_QW1_LENGTH_HBUF_SHIFT 52
+#define I40E_RXD_QW1_LENGTH_HBUF_MASK  (0x7FFULL << \
+                                        I40E_RXD_QW1_LENGTH_HBUF_SHIFT)
+
+#define I40E_RXD_QW1_LENGTH_SPH_SHIFT  63
+#define I40E_RXD_QW1_LENGTH_SPH_MASK   (0x1ULL << \
+                                        I40E_RXD_QW1_LENGTH_SPH_SHIFT)
+
+enum i40e_rx_desc_ext_status_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_DESC_EXT_STATUS_L2TAG2P_SHIFT   = 0,
+       I40E_RX_DESC_EXT_STATUS_L2TAG3P_SHIFT   = 1,
+       I40E_RX_DESC_EXT_STATUS_FLEXBL_SHIFT    = 2, /* 2 BITS */
+       I40E_RX_DESC_EXT_STATUS_FLEXBH_SHIFT    = 4, /* 2 BITS */
+       I40E_RX_DESC_EXT_STATUS_FTYPE_SHIFT     = 6, /* 3 BITS */
+       I40E_RX_DESC_EXT_STATUS_FDLONGB_SHIFT   = 9,
+       I40E_RX_DESC_EXT_STATUS_FCOELONGB_SHIFT = 10,
+       I40E_RX_DESC_EXT_STATUS_PELONGB_SHIFT   = 11,
+};
+
+enum i40e_rx_desc_pe_status_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_DESC_PE_STATUS_QPID_SHIFT       = 0, /* 18 BITS */
+       I40E_RX_DESC_PE_STATUS_L4PORT_SHIFT     = 0, /* 16 BITS */
+       I40E_RX_DESC_PE_STATUS_IPINDEX_SHIFT    = 16, /* 8 BITS */
+       I40E_RX_DESC_PE_STATUS_QPIDHIT_SHIFT    = 24,
+       I40E_RX_DESC_PE_STATUS_APBVTHIT_SHIFT   = 25,
+       I40E_RX_DESC_PE_STATUS_PORTV_SHIFT      = 26,
+       I40E_RX_DESC_PE_STATUS_URG_SHIFT        = 27,
+       I40E_RX_DESC_PE_STATUS_IPFRAG_SHIFT     = 28,
+       I40E_RX_DESC_PE_STATUS_IPOPT_SHIFT      = 29
+};
+
+#define I40E_RX_PROG_STATUS_DESC_LENGTH_SHIFT          38
+#define I40E_RX_PROG_STATUS_DESC_LENGTH                        0x2000000
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT      2
+#define I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK       (0x7UL << \
+                               I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT)
+
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT       19
+#define I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK                (0x3FUL << \
+                               I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT)
+
+enum i40e_rx_prog_status_desc_status_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_PROG_STATUS_DESC_DD_SHIFT       = 0,
+       I40E_RX_PROG_STATUS_DESC_PROG_ID_SHIFT  = 2 /* 3 BITS */
+};
+
+enum i40e_rx_prog_status_desc_prog_id_masks {
+       I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS       = 1,
+       I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_PROG_STATUS  = 2,
+       I40E_RX_PROG_STATUS_DESC_FCOE_CTXT_INVL_STATUS  = 4,
+};
+
+enum i40e_rx_prog_status_desc_error_bits {
+       /* Note: These are predefined bit offsets */
+       I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT      = 0,
+       I40E_RX_PROG_STATUS_DESC_NO_FD_QUOTA_SHIFT      = 1,
+       I40E_RX_PROG_STATUS_DESC_FCOE_TBL_FULL_SHIFT    = 2,
+       I40E_RX_PROG_STATUS_DESC_FCOE_CONFLICT_SHIFT    = 3
+};
+
+/* TX Descriptor */
+struct i40e_tx_desc {
+       __le64 buffer_addr; /* Address of descriptor's data buf */
+       __le64 cmd_type_offset_bsz;
+};
+
+#define I40E_TXD_QW1_DTYPE_SHIFT       0
+#define I40E_TXD_QW1_DTYPE_MASK                (0xFUL << I40E_TXD_QW1_DTYPE_SHIFT)
+
+enum i40e_tx_desc_dtype_value {
+       I40E_TX_DESC_DTYPE_DATA         = 0x0,
+       I40E_TX_DESC_DTYPE_NOP          = 0x1, /* same as Context desc */
+       I40E_TX_DESC_DTYPE_CONTEXT      = 0x1,
+       I40E_TX_DESC_DTYPE_FCOE_CTX     = 0x2,
+       I40E_TX_DESC_DTYPE_FILTER_PROG  = 0x8,
+       I40E_TX_DESC_DTYPE_DDP_CTX      = 0x9,
+       I40E_TX_DESC_DTYPE_FLEX_DATA    = 0xB,
+       I40E_TX_DESC_DTYPE_FLEX_CTX_1   = 0xC,
+       I40E_TX_DESC_DTYPE_FLEX_CTX_2   = 0xD,
+       I40E_TX_DESC_DTYPE_DESC_DONE    = 0xF
+};
+
+#define I40E_TXD_QW1_CMD_SHIFT 4
+#define I40E_TXD_QW1_CMD_MASK  (0x3FFUL << I40E_TXD_QW1_CMD_SHIFT)
+
+enum i40e_tx_desc_cmd_bits {
+       I40E_TX_DESC_CMD_EOP                    = 0x0001,
+       I40E_TX_DESC_CMD_RS                     = 0x0002,
+       I40E_TX_DESC_CMD_ICRC                   = 0x0004,
+       I40E_TX_DESC_CMD_IL2TAG1                = 0x0008,
+       I40E_TX_DESC_CMD_DUMMY                  = 0x0010,
+       I40E_TX_DESC_CMD_IIPT_NONIP             = 0x0000, /* 2 BITS */
+       I40E_TX_DESC_CMD_IIPT_IPV6              = 0x0020, /* 2 BITS */
+       I40E_TX_DESC_CMD_IIPT_IPV4              = 0x0040, /* 2 BITS */
+       I40E_TX_DESC_CMD_IIPT_IPV4_CSUM         = 0x0060, /* 2 BITS */
+       I40E_TX_DESC_CMD_FCOET                  = 0x0080,
+       I40E_TX_DESC_CMD_L4T_EOFT_UNK           = 0x0000, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_TCP           = 0x0100, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_SCTP          = 0x0200, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_UDP           = 0x0300, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_EOF_N         = 0x0000, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_EOF_T         = 0x0100, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_EOF_NI        = 0x0200, /* 2 BITS */
+       I40E_TX_DESC_CMD_L4T_EOFT_EOF_A         = 0x0300, /* 2 BITS */
+};
+
+#define I40E_TXD_QW1_OFFSET_SHIFT      16
+#define I40E_TXD_QW1_OFFSET_MASK       (0x3FFFFULL << \
+                                        I40E_TXD_QW1_OFFSET_SHIFT)
+
+enum i40e_tx_desc_length_fields {
+       /* Note: These are predefined bit offsets */
+       I40E_TX_DESC_LENGTH_MACLEN_SHIFT        = 0, /* 7 BITS */
+       I40E_TX_DESC_LENGTH_IPLEN_SHIFT         = 7, /* 7 BITS */
+       I40E_TX_DESC_LENGTH_L4_FC_LEN_SHIFT     = 14 /* 4 BITS */
+};
+
+#define I40E_TXD_QW1_TX_BUF_SZ_SHIFT   34
+#define I40E_TXD_QW1_TX_BUF_SZ_MASK    (0x3FFFULL << \
+                                        I40E_TXD_QW1_TX_BUF_SZ_SHIFT)
+
+#define I40E_TXD_QW1_L2TAG1_SHIFT      48
+#define I40E_TXD_QW1_L2TAG1_MASK       (0xFFFFULL << I40E_TXD_QW1_L2TAG1_SHIFT)
+
+/* Context descriptors */
+struct i40e_tx_context_desc {
+       __le32 tunneling_params;
+       __le16 l2tag2;
+       __le16 rsvd;
+       __le64 type_cmd_tso_mss;
+};
+
+#define I40E_TXD_CTX_QW1_DTYPE_SHIFT   0
+#define I40E_TXD_CTX_QW1_DTYPE_MASK    (0xFUL << I40E_TXD_CTX_QW1_DTYPE_SHIFT)
+
+#define I40E_TXD_CTX_QW1_CMD_SHIFT     4
+#define I40E_TXD_CTX_QW1_CMD_MASK      (0xFFFFUL << I40E_TXD_CTX_QW1_CMD_SHIFT)
+
+enum i40e_tx_ctx_desc_cmd_bits {
+       I40E_TX_CTX_DESC_TSO            = 0x01,
+       I40E_TX_CTX_DESC_TSYN           = 0x02,
+       I40E_TX_CTX_DESC_IL2TAG2        = 0x04,
+       I40E_TX_CTX_DESC_IL2TAG2_IL2H   = 0x08,
+       I40E_TX_CTX_DESC_SWTCH_NOTAG    = 0x00,
+       I40E_TX_CTX_DESC_SWTCH_UPLINK   = 0x10,
+       I40E_TX_CTX_DESC_SWTCH_LOCAL    = 0x20,
+       I40E_TX_CTX_DESC_SWTCH_VSI      = 0x30,
+       I40E_TX_CTX_DESC_SWPE           = 0x40
+};
+
+#define I40E_TXD_CTX_QW1_TSO_LEN_SHIFT 30
+#define I40E_TXD_CTX_QW1_TSO_LEN_MASK  (0x3FFFFULL << \
+                                        I40E_TXD_CTX_QW1_TSO_LEN_SHIFT)
+
+#define I40E_TXD_CTX_QW1_MSS_SHIFT     50
+#define I40E_TXD_CTX_QW1_MSS_MASK      (0x3FFFULL << \
+                                        I40E_TXD_CTX_QW1_MSS_SHIFT)
+
+#define I40E_TXD_CTX_QW1_VSI_SHIFT     50
+#define I40E_TXD_CTX_QW1_VSI_MASK      (0x1FFULL << I40E_TXD_CTX_QW1_VSI_SHIFT)
+
+#define I40E_TXD_CTX_QW0_EXT_IP_SHIFT  0
+#define I40E_TXD_CTX_QW0_EXT_IP_MASK   (0x3ULL << \
+                                        I40E_TXD_CTX_QW0_EXT_IP_SHIFT)
+
+enum i40e_tx_ctx_desc_eipt_offload {
+       I40E_TX_CTX_EXT_IP_NONE         = 0x0,
+       I40E_TX_CTX_EXT_IP_IPV6         = 0x1,
+       I40E_TX_CTX_EXT_IP_IPV4_NO_CSUM = 0x2,
+       I40E_TX_CTX_EXT_IP_IPV4         = 0x3
+};
+
+#define I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT       2
+#define I40E_TXD_CTX_QW0_EXT_IPLEN_MASK        (0x3FULL << \
+                                        I40E_TXD_CTX_QW0_EXT_IPLEN_SHIFT)
+
+#define I40E_TXD_CTX_QW0_NATT_SHIFT    9
+#define I40E_TXD_CTX_QW0_NATT_MASK     (0x3ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+
+#define I40E_TXD_CTX_UDP_TUNNELING     (0x1ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+#define I40E_TXD_CTX_GRE_TUNNELING     (0x2ULL << I40E_TXD_CTX_QW0_NATT_SHIFT)
+
+#define I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT       11
+#define I40E_TXD_CTX_QW0_EIP_NOINC_MASK        (0x1ULL << \
+                                        I40E_TXD_CTX_QW0_EIP_NOINC_SHIFT)
+
+#define I40E_TXD_CTX_EIP_NOINC_IPID_CONST      I40E_TXD_CTX_QW0_EIP_NOINC_MASK
+
+#define I40E_TXD_CTX_QW0_NATLEN_SHIFT  12
+#define I40E_TXD_CTX_QW0_NATLEN_MASK   (0X7FULL << \
+                                        I40E_TXD_CTX_QW0_NATLEN_SHIFT)
+
+#define I40E_TXD_CTX_QW0_DECTTL_SHIFT  19
+#define I40E_TXD_CTX_QW0_DECTTL_MASK   (0xFULL << \
+                                        I40E_TXD_CTX_QW0_DECTTL_SHIFT)
+
+struct i40e_filter_program_desc {
+       __le32 qindex_flex_ptype_vsi;
+       __le32 rsvd;
+       __le32 dtype_cmd_cntindex;
+       __le32 fd_id;
+};
+#define I40E_TXD_FLTR_QW0_QINDEX_SHIFT 0
+#define I40E_TXD_FLTR_QW0_QINDEX_MASK  (0x7FFUL << \
+                                        I40E_TXD_FLTR_QW0_QINDEX_SHIFT)
+#define I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT        11
+#define I40E_TXD_FLTR_QW0_FLEXOFF_MASK (0x7UL << \
+                                        I40E_TXD_FLTR_QW0_FLEXOFF_SHIFT)
+#define I40E_TXD_FLTR_QW0_PCTYPE_SHIFT 17
+#define I40E_TXD_FLTR_QW0_PCTYPE_MASK  (0x3FUL << \
+                                        I40E_TXD_FLTR_QW0_PCTYPE_SHIFT)
+
+/* Packet Classifier Types for filters */
+enum i40e_filter_pctype {
+       /* Note: Value 0-25 are reserved for future use */
+       I40E_FILTER_PCTYPE_IPV4_TEREDO_UDP              = 26,
+       I40E_FILTER_PCTYPE_IPV6_TEREDO_UDP              = 27,
+       I40E_FILTER_PCTYPE_NONF_IPV4_1588_UDP           = 28,
+       I40E_FILTER_PCTYPE_NONF_UNICAST_IPV4_UDP        = 29,
+       I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV4_UDP      = 30,
+       I40E_FILTER_PCTYPE_NONF_IPV4_UDP                = 31,
+       I40E_FILTER_PCTYPE_NONF_IPV4_TCP_SYN            = 32,
+       I40E_FILTER_PCTYPE_NONF_IPV4_TCP                = 33,
+       I40E_FILTER_PCTYPE_NONF_IPV4_SCTP               = 34,
+       I40E_FILTER_PCTYPE_NONF_IPV4_OTHER              = 35,
+       I40E_FILTER_PCTYPE_FRAG_IPV4                    = 36,
+       /* Note: Value 37 is reserved for future use */
+       I40E_FILTER_PCTYPE_NONF_IPV6_1588_UDP           = 38,
+       I40E_FILTER_PCTYPE_NONF_UNICAST_IPV6_UDP        = 39,
+       I40E_FILTER_PCTYPE_NONF_MULTICAST_IPV6_UDP      = 40,
+       I40E_FILTER_PCTYPE_NONF_IPV6_UDP                = 41,
+       I40E_FILTER_PCTYPE_NONF_IPV6_TCP_SYN            = 42,
+       I40E_FILTER_PCTYPE_NONF_IPV6_TCP                = 43,
+       I40E_FILTER_PCTYPE_NONF_IPV6_SCTP               = 44,
+       I40E_FILTER_PCTYPE_NONF_IPV6_OTHER              = 45,
+       I40E_FILTER_PCTYPE_FRAG_IPV6                    = 46,
+       /* Note: Value 47 is reserved for future use */
+       I40E_FILTER_PCTYPE_FCOE_OX                      = 48,
+       I40E_FILTER_PCTYPE_FCOE_RX                      = 49,
+       /* Note: Value 50-62 are reserved for future use */
+       I40E_FILTER_PCTYPE_L2_PAYLOAD                   = 63,
+};
+
+enum i40e_filter_program_desc_dest {
+       I40E_FILTER_PROGRAM_DESC_DEST_DROP_PACKET               = 0x0,
+       I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_QINDEX      = 0x1,
+       I40E_FILTER_PROGRAM_DESC_DEST_DIRECT_PACKET_OTHER       = 0x2,
+};
+
+enum i40e_filter_program_desc_fd_status {
+       I40E_FILTER_PROGRAM_DESC_FD_STATUS_NONE                 = 0x0,
+       I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID                = 0x1,
+       I40E_FILTER_PROGRAM_DESC_FD_STATUS_FD_ID_4FLEX_BYTES    = 0x2,
+       I40E_FILTER_PROGRAM_DESC_FD_STATUS_8FLEX_BYTES          = 0x3,
+};
+
+#define I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT       23
+#define I40E_TXD_FLTR_QW0_DEST_VSI_MASK        (0x1FFUL << \
+                                        I40E_TXD_FLTR_QW0_DEST_VSI_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CMD_SHIFT    4
+#define I40E_TXD_FLTR_QW1_CMD_MASK     (0xFFFFULL << \
+                                        I40E_TXD_FLTR_QW1_CMD_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_PCMD_SHIFT   (0x0ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_PCMD_MASK    (0x7ULL << I40E_TXD_FLTR_QW1_PCMD_SHIFT)
+
+enum i40e_filter_program_desc_pcmd {
+       I40E_FILTER_PROGRAM_DESC_PCMD_ADD_UPDATE        = 0x1,
+       I40E_FILTER_PROGRAM_DESC_PCMD_REMOVE            = 0x2,
+};
+
+#define I40E_TXD_FLTR_QW1_DEST_SHIFT   (0x3ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_DEST_MASK    (0x3ULL << I40E_TXD_FLTR_QW1_DEST_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT        (0x7ULL + I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_CNT_ENA_MASK (0x1ULL << \
+                                        I40E_TXD_FLTR_QW1_CNT_ENA_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT      (0x9ULL + \
+                                                I40E_TXD_FLTR_QW1_CMD_SHIFT)
+#define I40E_TXD_FLTR_QW1_FD_STATUS_MASK (0x3ULL << \
+                                         I40E_TXD_FLTR_QW1_FD_STATUS_SHIFT)
+
+#define I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT 20
+#define I40E_TXD_FLTR_QW1_CNTINDEX_MASK        (0x1FFUL << \
+                                        I40E_TXD_FLTR_QW1_CNTINDEX_SHIFT)
+
+enum i40e_filter_type {
+       I40E_FLOW_DIRECTOR_FLTR = 0,
+       I40E_PE_QUAD_HASH_FLTR = 1,
+       I40E_ETHERTYPE_FLTR,
+       I40E_FCOE_CTX_FLTR,
+       I40E_MAC_VLAN_FLTR,
+       I40E_HASH_FLTR
+};
+
+struct i40e_vsi_context {
+       u16 seid;
+       u16 uplink_seid;
+       u16 vsi_number;
+       u16 vsis_allocated;
+       u16 vsis_unallocated;
+       u16 flags;
+       u8 pf_num;
+       u8 vf_num;
+       u8 connection_type;
+       struct i40e_aqc_vsi_properties_data info;
+};
+
+/* Statistics collected by each port, VSI, VEB, and S-channel */
+struct i40e_eth_stats {
+       u64 rx_bytes;                   /* gorc */
+       u64 rx_unicast;                 /* uprc */
+       u64 rx_multicast;               /* mprc */
+       u64 rx_broadcast;               /* bprc */
+       u64 rx_discards;                /* rdpc */
+       u64 rx_errors;                  /* repc */
+       u64 rx_missed;                  /* rmpc */
+       u64 rx_unknown_protocol;        /* rupp */
+       u64 tx_bytes;                   /* gotc */
+       u64 tx_unicast;                 /* uptc */
+       u64 tx_multicast;               /* mptc */
+       u64 tx_broadcast;               /* bptc */
+       u64 tx_discards;                /* tdpc */
+       u64 tx_errors;                  /* tepc */
+};
+
+/* Statistics collected by the MAC */
+struct i40e_hw_port_stats {
+       /* eth stats collected by the port */
+       struct i40e_eth_stats eth;
+
+       /* additional port specific stats */
+       u64 tx_dropped_link_down;       /* tdold */
+       u64 crc_errors;                 /* crcerrs */
+       u64 illegal_bytes;              /* illerrc */
+       u64 error_bytes;                /* errbc */
+       u64 mac_local_faults;           /* mlfc */
+       u64 mac_remote_faults;          /* mrfc */
+       u64 rx_length_errors;           /* rlec */
+       u64 link_xon_rx;                /* lxonrxc */
+       u64 link_xoff_rx;               /* lxoffrxc */
+       u64 priority_xon_rx[8];         /* pxonrxc[8] */
+       u64 priority_xoff_rx[8];        /* pxoffrxc[8] */
+       u64 link_xon_tx;                /* lxontxc */
+       u64 link_xoff_tx;               /* lxofftxc */
+       u64 priority_xon_tx[8];         /* pxontxc[8] */
+       u64 priority_xoff_tx[8];        /* pxofftxc[8] */
+       u64 priority_xon_2_xoff[8];     /* pxon2offc[8] */
+       u64 rx_size_64;                 /* prc64 */
+       u64 rx_size_127;                /* prc127 */
+       u64 rx_size_255;                /* prc255 */
+       u64 rx_size_511;                /* prc511 */
+       u64 rx_size_1023;               /* prc1023 */
+       u64 rx_size_1522;               /* prc1522 */
+       u64 rx_size_big;                /* prc9522 */
+       u64 rx_undersize;               /* ruc */
+       u64 rx_fragments;               /* rfc */
+       u64 rx_oversize;                /* roc */
+       u64 rx_jabber;                  /* rjc */
+       u64 tx_size_64;                 /* ptc64 */
+       u64 tx_size_127;                /* ptc127 */
+       u64 tx_size_255;                /* ptc255 */
+       u64 tx_size_511;                /* ptc511 */
+       u64 tx_size_1023;               /* ptc1023 */
+       u64 tx_size_1522;               /* ptc1522 */
+       u64 tx_size_big;                /* ptc9522 */
+       u64 mac_short_packet_dropped;   /* mspdc */
+       u64 checksum_error;             /* xec */
+};
+
+/* Checksum and Shadow RAM pointers */
+#define I40E_SR_NVM_CONTROL_WORD               0x00
+#define I40E_SR_EMP_MODULE_PTR                 0x0F
+#define I40E_SR_NVM_IMAGE_VERSION              0x18
+#define I40E_SR_ALTERNATE_SAN_MAC_ADDRESS_PTR  0x27
+#define I40E_SR_NVM_EETRACK_LO                 0x2D
+#define I40E_SR_NVM_EETRACK_HI                 0x2E
+#define I40E_SR_VPD_PTR                                0x2F
+#define I40E_SR_PCIE_ALT_AUTO_LOAD_PTR         0x3E
+#define I40E_SR_SW_CHECKSUM_WORD               0x3F
+
+/* Auxiliary field, mask and shift definition for Shadow RAM and NVM Flash */
+#define I40E_SR_VPD_MODULE_MAX_SIZE            1024
+#define I40E_SR_PCIE_ALT_MODULE_MAX_SIZE       1024
+#define I40E_SR_CONTROL_WORD_1_SHIFT           0x06
+#define I40E_SR_CONTROL_WORD_1_MASK    (0x03 << I40E_SR_CONTROL_WORD_1_SHIFT)
+
+/* Shadow RAM related */
+#define I40E_SR_SECTOR_SIZE_IN_WORDS   0x800
+#define I40E_SR_WORDS_IN_1KB           512
+/* Checksum should be calculated such that after adding all the words,
+ * including the checksum word itself, the sum should be 0xBABA.
+ */
+#define I40E_SR_SW_CHECKSUM_BASE       0xBABA
+
+#define I40E_SRRD_SRCTL_ATTEMPTS       100000
+
+enum i40e_switch_element_types {
+       I40E_SWITCH_ELEMENT_TYPE_MAC    = 1,
+       I40E_SWITCH_ELEMENT_TYPE_PF     = 2,
+       I40E_SWITCH_ELEMENT_TYPE_VF     = 3,
+       I40E_SWITCH_ELEMENT_TYPE_EMP    = 4,
+       I40E_SWITCH_ELEMENT_TYPE_BMC    = 6,
+       I40E_SWITCH_ELEMENT_TYPE_PE     = 16,
+       I40E_SWITCH_ELEMENT_TYPE_VEB    = 17,
+       I40E_SWITCH_ELEMENT_TYPE_PA     = 18,
+       I40E_SWITCH_ELEMENT_TYPE_VSI    = 19,
+};
+
+/* Supported EtherType filters */
+enum i40e_ether_type_index {
+       I40E_ETHER_TYPE_1588            = 0,
+       I40E_ETHER_TYPE_FIP             = 1,
+       I40E_ETHER_TYPE_OUI_EXTENDED    = 2,
+       I40E_ETHER_TYPE_MAC_CONTROL     = 3,
+       I40E_ETHER_TYPE_LLDP            = 4,
+       I40E_ETHER_TYPE_EVB_PROTOCOL1   = 5,
+       I40E_ETHER_TYPE_EVB_PROTOCOL2   = 6,
+       I40E_ETHER_TYPE_QCN_CNM         = 7,
+       I40E_ETHER_TYPE_8021X           = 8,
+       I40E_ETHER_TYPE_ARP             = 9,
+       I40E_ETHER_TYPE_RSV1            = 10,
+       I40E_ETHER_TYPE_RSV2            = 11,
+};
+
+/* Filter context base size is 1K */
+#define I40E_HASH_FILTER_BASE_SIZE     1024
+/* Supported Hash filter values */
+enum i40e_hash_filter_size {
+       I40E_HASH_FILTER_SIZE_1K        = 0,
+       I40E_HASH_FILTER_SIZE_2K        = 1,
+       I40E_HASH_FILTER_SIZE_4K        = 2,
+       I40E_HASH_FILTER_SIZE_8K        = 3,
+       I40E_HASH_FILTER_SIZE_16K       = 4,
+       I40E_HASH_FILTER_SIZE_32K       = 5,
+       I40E_HASH_FILTER_SIZE_64K       = 6,
+       I40E_HASH_FILTER_SIZE_128K      = 7,
+       I40E_HASH_FILTER_SIZE_256K      = 8,
+       I40E_HASH_FILTER_SIZE_512K      = 9,
+       I40E_HASH_FILTER_SIZE_1M        = 10,
+};
+
+/* DMA context base size is 0.5K */
+#define I40E_DMA_CNTX_BASE_SIZE                512
+/* Supported DMA context values */
+enum i40e_dma_cntx_size {
+       I40E_DMA_CNTX_SIZE_512          = 0,
+       I40E_DMA_CNTX_SIZE_1K           = 1,
+       I40E_DMA_CNTX_SIZE_2K           = 2,
+       I40E_DMA_CNTX_SIZE_4K           = 3,
+       I40E_DMA_CNTX_SIZE_8K           = 4,
+       I40E_DMA_CNTX_SIZE_16K          = 5,
+       I40E_DMA_CNTX_SIZE_32K          = 6,
+       I40E_DMA_CNTX_SIZE_64K          = 7,
+       I40E_DMA_CNTX_SIZE_128K         = 8,
+       I40E_DMA_CNTX_SIZE_256K         = 9,
+};
+
+/* Supported Hash look up table (LUT) sizes */
+enum i40e_hash_lut_size {
+       I40E_HASH_LUT_SIZE_128          = 0,
+       I40E_HASH_LUT_SIZE_512          = 1,
+};
+
+/* Structure to hold a per PF filter control settings */
+struct i40e_filter_control_settings {
+       /* number of PE Quad Hash filter buckets */
+       enum i40e_hash_filter_size pe_filt_num;
+       /* number of PE Quad Hash contexts */
+       enum i40e_dma_cntx_size pe_cntx_num;
+       /* number of FCoE filter buckets */
+       enum i40e_hash_filter_size fcoe_filt_num;
+       /* number of FCoE DDP contexts */
+       enum i40e_dma_cntx_size fcoe_cntx_num;
+       /* size of the Hash LUT */
+       enum i40e_hash_lut_size hash_lut_size;
+       /* enable FDIR filters for PF and its VFs */
+       bool enable_fdir;
+       /* enable Ethertype filters for PF and its VFs */
+       bool enable_ethtype;
+       /* enable MAC/VLAN filters for PF and its VFs */
+       bool enable_macvlan;
+};
+
+/* Structure to hold device level control filter counts */
+struct i40e_control_filter_stats {
+       u16 mac_etype_used;   /* Used perfect match MAC/EtherType filters */
+       u16 etype_used;       /* Used perfect EtherType filters */
+       u16 mac_etype_free;   /* Un-used perfect match MAC/EtherType filters */
+       u16 etype_free;       /* Un-used perfect EtherType filters */
+};
+
+enum i40e_reset_type {
+       I40E_RESET_POR          = 0,
+       I40E_RESET_CORER        = 1,
+       I40E_RESET_GLOBR        = 2,
+       I40E_RESET_EMPR         = 3,
+};
+
+/* IEEE 802.1AB LLDP Agent Variables from NVM */
+#define I40E_NVM_LLDP_CFG_PTR          0xF
+struct i40e_lldp_variables {
+       u16 length;
+       u16 adminstatus;
+       u16 msgfasttx;
+       u16 msgtxinterval;
+       u16 txparams;
+       u16 timers;
+       u16 crc8;
+};
+
+#endif /* _I40E_TYPE_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl.h
new file mode 100644 (file)
index 0000000..cc6654f
--- /dev/null
@@ -0,0 +1,368 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_VIRTCHNL_H_
+#define _I40E_VIRTCHNL_H_
+
+#include "i40e_type.h"
+
+/* Description:
+ * This header file describes the VF-PF communication protocol used
+ * by the various i40e drivers.
+ *
+ * Admin queue buffer usage:
+ * desc->opcode is always i40e_aqc_opc_send_msg_to_pf
+ * flags, retval, datalen, and data addr are all used normally.
+ * Firmware copies the cookie fields when sending messages between the PF and
+ * VF, but uses all other fields internally. Due to this limitation, we
+ * must send all messages as "indirect", i.e. using an external buffer.
+ *
+ * All the vsi indexes are relative to the VF. Each VF can have maximum of
+ * three VSIs. All the queue indexes are relative to the VSI.  Each VF can
+ * have a maximum of sixteen queues for all of its VSIs.
+ *
+ * The PF is required to return a status code in v_retval for all messages
+ * except RESET_VF, which does not require any response. The return value is of
+ * i40e_status_code type, defined in the i40e_type.h.
+ *
+ * In general, VF driver initialization should roughly follow the order of these
+ * opcodes. The VF driver must first validate the API version of the PF driver,
+ * then request a reset, then get resources, then configure queues and
+ * interrupts. After these operations are complete, the VF driver may start
+ * its queues, optionally add MAC and VLAN filters, and process traffic.
+ */
+
+/* Opcodes for VF-PF communication. These are placed in the v_opcode field
+ * of the virtchnl_msg structure.
+ */
+enum i40e_virtchnl_ops {
+/* VF sends req. to pf for the following
+ * ops.
+ */
+       I40E_VIRTCHNL_OP_UNKNOWN = 0,
+       I40E_VIRTCHNL_OP_VERSION = 1, /* must ALWAYS be 1 */
+       I40E_VIRTCHNL_OP_RESET_VF,
+       I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+       I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE,
+       I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE,
+       I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+       I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
+       I40E_VIRTCHNL_OP_ENABLE_QUEUES,
+       I40E_VIRTCHNL_OP_DISABLE_QUEUES,
+       I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
+       I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
+       I40E_VIRTCHNL_OP_ADD_VLAN,
+       I40E_VIRTCHNL_OP_DEL_VLAN,
+       I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+       I40E_VIRTCHNL_OP_GET_STATS,
+       I40E_VIRTCHNL_OP_FCOE,
+/* PF sends status change events to vfs using
+ * the following op.
+ */
+       I40E_VIRTCHNL_OP_EVENT,
+};
+
+/* Virtual channel message descriptor. This overlays the admin queue
+ * descriptor. All other data is passed in external buffers.
+ */
+
+struct i40e_virtchnl_msg {
+       u8 pad[8];                       /* AQ flags/opcode/len/retval fields */
+       enum i40e_virtchnl_ops v_opcode; /* avoid confusion with desc->opcode */
+       i40e_status v_retval;  /* ditto for desc->retval */
+       u32 vfid;                        /* used by PF when sending to VF */
+};
+
+/* Message descriptions and data structures.*/
+
+/* I40E_VIRTCHNL_OP_VERSION
+ * VF posts its version number to the PF. PF responds with its version number
+ * in the same format, along with a return code.
+ * Reply from PF has its major/minor versions also in param0 and param1.
+ * If there is a major version mismatch, then the VF cannot operate.
+ * If there is a minor version mismatch, then the VF can operate but should
+ * add a warning to the system log.
+ *
+ * This enum element MUST always be specified as == 1, regardless of other
+ * changes in the API. The PF must always respond to this message without
+ * error regardless of version mismatch.
+ */
+#define I40E_VIRTCHNL_VERSION_MAJOR            1
+#define I40E_VIRTCHNL_VERSION_MINOR            0
+struct i40e_virtchnl_version_info {
+       u32 major;
+       u32 minor;
+};
+
+/* I40E_VIRTCHNL_OP_RESET_VF
+ * VF sends this request to PF with no parameters
+ * PF does NOT respond! VF driver must delay then poll VFGEN_RSTAT register
+ * until reset completion is indicated. The admin queue must be reinitialized
+ * after this operation.
+ *
+ * When reset is complete, PF must ensure that all queues in all VSIs associated
+ * with the VF are stopped, all queue configurations in the HMC are set to 0,
+ * and all MAC and VLAN filters (except the default MAC address) on all VSIs
+ * are cleared.
+ */
+
+/* I40E_VIRTCHNL_OP_GET_VF_RESOURCES
+ * VF sends this request to PF with no parameters
+ * PF responds with an indirect message containing
+ * i40e_virtchnl_vf_resource and one or more
+ * i40e_virtchnl_vsi_resource structures.
+ */
+
+struct i40e_virtchnl_vsi_resource {
+       u16 vsi_id;
+       u16 num_queue_pairs;
+       enum i40e_vsi_type vsi_type;
+       u16 qset_handle;
+       u8 default_mac_addr[I40E_ETH_LENGTH_OF_ADDRESS];
+};
+/* VF offload flags */
+#define I40E_VIRTCHNL_VF_OFFLOAD_L2    0x00000001
+#define I40E_VIRTCHNL_VF_OFFLOAD_FCOE  0x00000004
+#define I40E_VIRTCHNL_VF_OFFLOAD_VLAN  0x00010000
+
+struct i40e_virtchnl_vf_resource {
+       u16 num_vsis;
+       u16 num_queue_pairs;
+       u16 max_vectors;
+       u16 max_mtu;
+
+       u32 vf_offload_flags;
+       u32 max_fcoe_contexts;
+       u32 max_fcoe_filters;
+
+       struct i40e_virtchnl_vsi_resource vsi_res[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE
+ * VF sends this message to set up parameters for one TX queue.
+ * External data buffer contains one instance of i40e_virtchnl_txq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Tx queue config info */
+struct i40e_virtchnl_txq_info {
+       u16 vsi_id;
+       u16 queue_id;
+       u16 ring_len;           /* number of descriptors, multiple of 8 */
+       u16 headwb_enabled;
+       u64 dma_ring_addr;
+       u64 dma_headwb_addr;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE
+ * VF sends this message to set up parameters for one RX queue.
+ * External data buffer contains one instance of i40e_virtchnl_rxq_info.
+ * PF configures requested queue and returns a status code.
+ */
+
+/* Rx queue config info */
+struct i40e_virtchnl_rxq_info {
+       u16 vsi_id;
+       u16 queue_id;
+       u32 ring_len;           /* number of descriptors, multiple of 32 */
+       u16 hdr_size;
+       u16 splithdr_enabled;
+       u32 databuffer_size;
+       u32 max_pkt_size;
+       u64 dma_ring_addr;
+       enum i40e_hmc_obj_rx_hsplit_0 rx_split_pos;
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES
+ * VF sends this message to set parameters for all active TX and RX queues
+ * associated with the specified VSI.
+ * PF configures queues and returns status.
+ * If the number of queues specified is greater than the number of queues
+ * associated with the VSI, an error is returned and no queues are configured.
+ */
+struct i40e_virtchnl_queue_pair_info {
+       /* NOTE: vsi_id and queue_id should be identical for both queues. */
+       struct i40e_virtchnl_txq_info txq;
+       struct i40e_virtchnl_rxq_info rxq;
+};
+
+struct i40e_virtchnl_vsi_queue_config_info {
+       u16 vsi_id;
+       u16 num_queue_pairs;
+       struct i40e_virtchnl_queue_pair_info qpair[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP
+ * VF uses this message to map vectors to queues.
+ * The rxq_map and txq_map fields are bitmaps used to indicate which queues
+ * are to be associated with the specified vector.
+ * The "other" causes are always mapped to vector 0.
+ * PF configures interrupt mapping and returns status.
+ */
+struct i40e_virtchnl_vector_map {
+       u16 vsi_id;
+       u16 vector_id;
+       u16 rxq_map;
+       u16 txq_map;
+       u16 rxitr_idx;
+       u16 txitr_idx;
+};
+
+struct i40e_virtchnl_irq_map_info {
+       u16 num_vectors;
+       struct i40e_virtchnl_vector_map vecmap[1];
+};
+
+/* I40E_VIRTCHNL_OP_ENABLE_QUEUES
+ * I40E_VIRTCHNL_OP_DISABLE_QUEUES
+ * VF sends these message to enable or disable TX/RX queue pairs.
+ * The queues fields are bitmaps indicating which queues to act upon.
+ * (Currently, we only support 16 queues per VF, but we make the field
+ * u32 to allow for expansion.)
+ * PF performs requested action and returns status.
+ */
+struct i40e_virtchnl_queue_select {
+       u16 vsi_id;
+       u16 pad;
+       u32 rx_queues;
+       u32 tx_queues;
+};
+
+/* I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS
+ * VF sends this message in order to add one or more unicast or multicast
+ * address filters for the specified VSI.
+ * PF adds the filters and returns status.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS
+ * VF sends this message in order to remove one or more unicast or multicast
+ * filters for the specified VSI.
+ * PF removes the filters and returns status.
+ */
+
+struct i40e_virtchnl_ether_addr {
+       u8 addr[I40E_ETH_LENGTH_OF_ADDRESS];
+       u8 pad[2];
+};
+
+struct i40e_virtchnl_ether_addr_list {
+       u16 vsi_id;
+       u16 num_elements;
+       struct i40e_virtchnl_ether_addr list[1];
+};
+
+/* I40E_VIRTCHNL_OP_ADD_VLAN
+ * VF sends this message to add one or more VLAN tag filters for receives.
+ * PF adds the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+/* I40E_VIRTCHNL_OP_DEL_VLAN
+ * VF sends this message to remove one or more VLAN tag filters for receives.
+ * PF removes the filters and returns status.
+ * If a port VLAN is configured by the PF, this operation will return an
+ * error to the VF.
+ */
+
+struct i40e_virtchnl_vlan_filter_list {
+       u16 vsi_id;
+       u16 num_elements;
+       u16 vlan_id[1];
+};
+
+/* I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE
+ * VF sends VSI id and flags.
+ * PF returns status code in retval.
+ * Note: we assume that broadcast accept mode is always enabled.
+ */
+struct i40e_virtchnl_promisc_info {
+       u16 vsi_id;
+       u16 flags;
+};
+
+#define I40E_FLAG_VF_UNICAST_PROMISC   0x00000001
+#define I40E_FLAG_VF_MULTICAST_PROMISC 0x00000002
+
+/* I40E_VIRTCHNL_OP_GET_STATS
+ * VF sends this message to request stats for the selected VSI. VF uses
+ * the i40e_virtchnl_queue_select struct to specify the VSI. The queue_id
+ * field is ignored by the PF.
+ *
+ * PF replies with struct i40e_eth_stats in an external buffer.
+ */
+
+/* I40E_VIRTCHNL_OP_EVENT
+ * PF sends this message to inform the VF driver of events that may affect it.
+ * No direct response is expected from the VF, though it may generate other
+ * messages in response to this one.
+ */
+enum i40e_virtchnl_event_codes {
+       I40E_VIRTCHNL_EVENT_UNKNOWN = 0,
+       I40E_VIRTCHNL_EVENT_LINK_CHANGE,
+       I40E_VIRTCHNL_EVENT_RESET_IMPENDING,
+       I40E_VIRTCHNL_EVENT_PF_DRIVER_CLOSE,
+};
+#define I40E_PF_EVENT_SEVERITY_INFO            0
+#define I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM    255
+
+struct i40e_virtchnl_pf_event {
+       enum i40e_virtchnl_event_codes event;
+       union {
+               struct {
+                       enum i40e_aq_link_speed link_speed;
+                       bool link_status;
+               } link_event;
+       } event_data;
+
+       int severity;
+};
+
+/* The following are TBD, not necessary for LAN functionality.
+ * I40E_VIRTCHNL_OP_FCOE
+ */
+
+/* VF reset states - these are written into the RSTAT register:
+ * I40E_VFGEN_RSTAT1 on the PF
+ * I40E_VFGEN_RSTAT on the VF
+ * When the PF initiates a reset, it writes 0
+ * When the reset is complete, it writes 1
+ * When the PF detects that the VF has recovered, it writes 2
+ * VF checks this register periodically to determine if a reset has occurred,
+ * then polls it to know when the reset is complete.
+ * If either the PF or VF reads the register while the hardware
+ * is in a reset state, it will return DEADBEEF, which, when masked
+ * will result in 3.
+ */
+enum i40e_vfr_states {
+       I40E_VFR_INPROGRESS = 0,
+       I40E_VFR_COMPLETED,
+       I40E_VFR_VFACTIVE,
+       I40E_VFR_UNKNOWN,
+};
+
+#endif /* _I40E_VIRTCHNL_H_ */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.c
new file mode 100644 (file)
index 0000000..8967e58
--- /dev/null
@@ -0,0 +1,2335 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#include "i40e.h"
+
+/***********************misc routines*****************************/
+
+/**
+ * i40e_vc_isvalid_vsi_id
+ * @vf: pointer to the vf info
+ * @vsi_id: vf relative vsi id
+ *
+ * check for the valid vsi id
+ **/
+static inline bool i40e_vc_isvalid_vsi_id(struct i40e_vf *vf, u8 vsi_id)
+{
+       struct i40e_pf *pf = vf->pf;
+
+       return pf->vsi[vsi_id]->vf_id == vf->vf_id;
+}
+
+/**
+ * i40e_vc_isvalid_queue_id
+ * @vf: pointer to the vf info
+ * @vsi_id: vsi id
+ * @qid: vsi relative queue id
+ *
+ * check for the valid queue id
+ **/
+static inline bool i40e_vc_isvalid_queue_id(struct i40e_vf *vf, u8 vsi_id,
+                                           u8 qid)
+{
+       struct i40e_pf *pf = vf->pf;
+
+       return qid < pf->vsi[vsi_id]->num_queue_pairs;
+}
+
+/**
+ * i40e_vc_isvalid_vector_id
+ * @vf: pointer to the vf info
+ * @vector_id: vf relative vector id
+ *
+ * check for the valid vector id
+ **/
+static inline bool i40e_vc_isvalid_vector_id(struct i40e_vf *vf, u8 vector_id)
+{
+       struct i40e_pf *pf = vf->pf;
+
+       return vector_id < pf->hw.func_caps.num_msix_vectors_vf;
+}
+
+/***********************vf resource mgmt routines*****************/
+
+/**
+ * i40e_vc_get_pf_queue_id
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vsi_queue_id: vsi relative queue id
+ *
+ * return pf relative queue id
+ **/
+static u16 i40e_vc_get_pf_queue_id(struct i40e_vf *vf, u8 vsi_idx,
+                                  u8 vsi_queue_id)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = pf->vsi[vsi_idx];
+       u16 pf_queue_id = I40E_QUEUE_END_OF_LIST;
+
+       if (le16_to_cpu(vsi->info.mapping_flags) &
+           I40E_AQ_VSI_QUE_MAP_NONCONTIG)
+               pf_queue_id =
+                       le16_to_cpu(vsi->info.queue_mapping[vsi_queue_id]);
+       else
+               pf_queue_id = le16_to_cpu(vsi->info.queue_mapping[0]) +
+                             vsi_queue_id;
+
+       return pf_queue_id;
+}
+
+/**
+ * i40e_ctrl_vsi_tx_queue
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vsi_queue_id: vsi relative queue index
+ * @ctrl: control flags
+ *
+ * enable/disable/enable check/disable check
+ **/
+static int i40e_ctrl_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
+                                 u16 vsi_queue_id,
+                                 enum i40e_queue_ctrl ctrl)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       bool writeback = false;
+       u16 pf_queue_id;
+       int ret = 0;
+       u32 reg;
+
+       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx, vsi_queue_id);
+       reg = rd32(hw, I40E_QTX_ENA(pf_queue_id));
+
+       switch (ctrl) {
+       case I40E_QUEUE_CTRL_ENABLE:
+               reg |= I40E_QTX_ENA_QENA_REQ_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_ENABLECHECK:
+               ret = (reg & I40E_QTX_ENA_QENA_STAT_MASK) ? 0 : -EPERM;
+               break;
+       case I40E_QUEUE_CTRL_DISABLE:
+               reg &= ~I40E_QTX_ENA_QENA_REQ_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_DISABLECHECK:
+               ret = (reg & I40E_QTX_ENA_QENA_STAT_MASK) ? -EPERM : 0;
+               break;
+       case I40E_QUEUE_CTRL_FASTDISABLE:
+               reg |= I40E_QTX_ENA_FAST_QDIS_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_FASTDISABLECHECK:
+               ret = (reg & I40E_QTX_ENA_QENA_STAT_MASK) ? -EPERM : 0;
+               if (!ret) {
+                       reg &= ~I40E_QTX_ENA_FAST_QDIS_MASK;
+                       writeback = true;
+               }
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       if (writeback) {
+               wr32(hw, I40E_QTX_ENA(pf_queue_id), reg);
+               i40e_flush(hw);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_ctrl_vsi_rx_queue
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vsi_queue_id: vsi relative queue index
+ * @ctrl: control flags
+ *
+ * enable/disable/enable check/disable check
+ **/
+static int i40e_ctrl_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_idx,
+                                 u16 vsi_queue_id,
+                                 enum i40e_queue_ctrl ctrl)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       bool writeback = false;
+       u16 pf_queue_id;
+       int ret = 0;
+       u32 reg;
+
+       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx, vsi_queue_id);
+       reg = rd32(hw, I40E_QRX_ENA(pf_queue_id));
+
+       switch (ctrl) {
+       case I40E_QUEUE_CTRL_ENABLE:
+               reg |= I40E_QRX_ENA_QENA_REQ_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_ENABLECHECK:
+               ret = (reg & I40E_QRX_ENA_QENA_STAT_MASK) ? 0 : -EPERM;
+               break;
+       case I40E_QUEUE_CTRL_DISABLE:
+               reg &= ~I40E_QRX_ENA_QENA_REQ_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_DISABLECHECK:
+               ret = (reg & I40E_QRX_ENA_QENA_STAT_MASK) ? -EPERM : 0;
+               break;
+       case I40E_QUEUE_CTRL_FASTDISABLE:
+               reg |= I40E_QRX_ENA_FAST_QDIS_MASK;
+               writeback = true;
+               break;
+       case I40E_QUEUE_CTRL_FASTDISABLECHECK:
+               ret = (reg & I40E_QRX_ENA_QENA_STAT_MASK) ? -EPERM : 0;
+               if (!ret) {
+                       reg &= ~I40E_QRX_ENA_FAST_QDIS_MASK;
+                       writeback = true;
+               }
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       if (writeback) {
+               wr32(hw, I40E_QRX_ENA(pf_queue_id), reg);
+               i40e_flush(hw);
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_config_irq_link_list
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vecmap: irq map info
+ *
+ * configure irq link list from the map
+ **/
+static void i40e_config_irq_link_list(struct i40e_vf *vf, u16 vsi_idx,
+                                     struct i40e_virtchnl_vector_map *vecmap)
+{
+       unsigned long linklistmap = 0, tempmap;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u16 vsi_queue_id, pf_queue_id;
+       enum i40e_queue_type qtype;
+       u16 next_q, vector_id;
+       u32 reg, reg_idx;
+       u16 itr_idx = 0;
+
+       vector_id = vecmap->vector_id;
+       /* setup the head */
+       if (0 == vector_id)
+               reg_idx = I40E_VPINT_LNKLST0(vf->vf_id);
+       else
+               reg_idx = I40E_VPINT_LNKLSTN(
+                           ((pf->hw.func_caps.num_msix_vectors_vf - 1)
+                                             * vf->vf_id) + (vector_id - 1));
+
+       if (vecmap->rxq_map == 0 && vecmap->txq_map == 0) {
+               /* Special case - No queues mapped on this vector */
+               wr32(hw, reg_idx, I40E_VPINT_LNKLST0_FIRSTQ_INDX_MASK);
+               goto irq_list_done;
+       }
+       tempmap = vecmap->rxq_map;
+       vsi_queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (vsi_queue_id < I40E_MAX_VSI_QP) {
+               linklistmap |= (1 <<
+                               (I40E_VIRTCHNL_SUPPORTED_QTYPES *
+                                vsi_queue_id));
+               vsi_queue_id =
+                   find_next_bit(&tempmap, I40E_MAX_VSI_QP, vsi_queue_id + 1);
+       }
+
+       tempmap = vecmap->txq_map;
+       vsi_queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (vsi_queue_id < I40E_MAX_VSI_QP) {
+               linklistmap |= (1 <<
+                               (I40E_VIRTCHNL_SUPPORTED_QTYPES * vsi_queue_id
+                                + 1));
+               vsi_queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                            vsi_queue_id + 1);
+       }
+
+       next_q = find_first_bit(&linklistmap,
+                               (I40E_MAX_VSI_QP *
+                                I40E_VIRTCHNL_SUPPORTED_QTYPES));
+       vsi_queue_id = next_q/I40E_VIRTCHNL_SUPPORTED_QTYPES;
+       qtype = next_q%I40E_VIRTCHNL_SUPPORTED_QTYPES;
+       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx, vsi_queue_id);
+       reg = ((qtype << I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_SHIFT) | pf_queue_id);
+
+       wr32(hw, reg_idx, reg);
+
+       while (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+               switch (qtype) {
+               case I40E_QUEUE_TYPE_RX:
+                       reg_idx = I40E_QINT_RQCTL(pf_queue_id);
+                       itr_idx = vecmap->rxitr_idx;
+                       break;
+               case I40E_QUEUE_TYPE_TX:
+                       reg_idx = I40E_QINT_TQCTL(pf_queue_id);
+                       itr_idx = vecmap->txitr_idx;
+                       break;
+               default:
+                       break;
+               }
+
+               next_q = find_next_bit(&linklistmap,
+                                      (I40E_MAX_VSI_QP *
+                                       I40E_VIRTCHNL_SUPPORTED_QTYPES),
+                                      next_q + 1);
+               if (next_q < (I40E_MAX_VSI_QP * I40E_VIRTCHNL_SUPPORTED_QTYPES)) {
+                       vsi_queue_id = next_q / I40E_VIRTCHNL_SUPPORTED_QTYPES;
+                       qtype = next_q % I40E_VIRTCHNL_SUPPORTED_QTYPES;
+                       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx,
+                                                             vsi_queue_id);
+               } else {
+                       pf_queue_id = I40E_QUEUE_END_OF_LIST;
+                       qtype = 0;
+               }
+
+               /* format for the RQCTL & TQCTL regs is same */
+               reg = (vector_id) |
+                   (qtype << I40E_QINT_RQCTL_NEXTQ_TYPE_SHIFT) |
+                   (pf_queue_id << I40E_QINT_RQCTL_NEXTQ_INDX_SHIFT) |
+                   (1 << I40E_QINT_RQCTL_CAUSE_ENA_SHIFT) |
+                   (itr_idx << I40E_QINT_RQCTL_ITR_INDX_SHIFT);
+               wr32(hw, reg_idx, reg);
+       }
+
+irq_list_done:
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_config_vsi_tx_queue
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vsi_queue_id: vsi relative queue index
+ * @info: config. info
+ *
+ * configure tx queue
+ **/
+static int i40e_config_vsi_tx_queue(struct i40e_vf *vf, u16 vsi_idx,
+                                   u16 vsi_queue_id,
+                                   struct i40e_virtchnl_txq_info *info)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_hmc_obj_txq tx_ctx;
+       u16 pf_queue_id;
+       u32 qtx_ctl;
+       int ret = 0;
+
+       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx, vsi_queue_id);
+
+       /* clear the context structure first */
+       memset(&tx_ctx, 0, sizeof(struct i40e_hmc_obj_txq));
+
+       /* only set the required fields */
+       tx_ctx.base = info->dma_ring_addr / 128;
+       tx_ctx.qlen = info->ring_len;
+       tx_ctx.rdylist = le16_to_cpu(pf->vsi[vsi_idx]->info.qs_handle[0]);
+       tx_ctx.rdylist_act = 0;
+
+       /* clear the context in the HMC */
+       ret = i40e_clear_lan_tx_queue_context(hw, pf_queue_id);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to clear VF LAN Tx queue context %d, error: %d\n",
+                       pf_queue_id, ret);
+               ret = -ENOENT;
+               goto error_context;
+       }
+
+       /* set the context in the HMC */
+       ret = i40e_set_lan_tx_queue_context(hw, pf_queue_id, &tx_ctx);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to set VF LAN Tx queue context %d error: %d\n",
+                       pf_queue_id, ret);
+               ret = -ENOENT;
+               goto error_context;
+       }
+
+       /* associate this queue with the PCI VF function */
+       qtx_ctl = I40E_QTX_CTL_VF_QUEUE;
+       qtx_ctl |= ((hw->hmc.hmc_fn_id << I40E_QTX_CTL_PF_INDX_SHIFT)
+                   & I40E_QTX_CTL_PF_INDX_MASK);
+       qtx_ctl |= (((vf->vf_id + hw->func_caps.vf_base_id)
+                    << I40E_QTX_CTL_VFVM_INDX_SHIFT)
+                   & I40E_QTX_CTL_VFVM_INDX_MASK);
+       wr32(hw, I40E_QTX_CTL(pf_queue_id), qtx_ctl);
+       i40e_flush(hw);
+
+error_context:
+       return ret;
+}
+
+/**
+ * i40e_config_vsi_rx_queue
+ * @vf: pointer to the vf info
+ * @vsi_idx: index of VSI in PF struct
+ * @vsi_queue_id: vsi relative queue index
+ * @info: config. info
+ *
+ * configure rx queue
+ **/
+static int i40e_config_vsi_rx_queue(struct i40e_vf *vf, u16 vsi_idx,
+                                   u16 vsi_queue_id,
+                                   struct i40e_virtchnl_rxq_info *info)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_hmc_obj_rxq rx_ctx;
+       u16 pf_queue_id;
+       int ret = 0;
+
+       pf_queue_id = i40e_vc_get_pf_queue_id(vf, vsi_idx, vsi_queue_id);
+
+       /* clear the context structure first */
+       memset(&rx_ctx, 0, sizeof(struct i40e_hmc_obj_rxq));
+
+       /* only set the required fields */
+       rx_ctx.base = info->dma_ring_addr / 128;
+       rx_ctx.qlen = info->ring_len;
+
+       if (info->splithdr_enabled) {
+               rx_ctx.hsplit_0 = I40E_RX_SPLIT_L2      |
+                                 I40E_RX_SPLIT_IP      |
+                                 I40E_RX_SPLIT_TCP_UDP |
+                                 I40E_RX_SPLIT_SCTP;
+               /* header length validation */
+               if (info->hdr_size > ((2 * 1024) - 64)) {
+                       ret = -EINVAL;
+                       goto error_param;
+               }
+               rx_ctx.hbuff = info->hdr_size >> I40E_RXQ_CTX_HBUFF_SHIFT;
+
+               /* set splitalways mode 10b */
+               rx_ctx.dtype = 0x2;
+       }
+
+       /* databuffer length validation */
+       if (info->databuffer_size > ((16 * 1024) - 128)) {
+               ret = -EINVAL;
+               goto error_param;
+       }
+       rx_ctx.dbuff = info->databuffer_size >> I40E_RXQ_CTX_DBUFF_SHIFT;
+
+       /* max pkt. length validation */
+       if (info->max_pkt_size >= (16 * 1024) || info->max_pkt_size < 64) {
+               ret = -EINVAL;
+               goto error_param;
+       }
+       rx_ctx.rxmax = info->max_pkt_size;
+
+       /* enable 32bytes desc always */
+       rx_ctx.dsize = 1;
+
+       /* default values */
+       rx_ctx.tphrdesc_ena = 1;
+       rx_ctx.tphwdesc_ena = 1;
+       rx_ctx.tphdata_ena = 1;
+       rx_ctx.tphhead_ena = 1;
+       rx_ctx.lrxqthresh = 2;
+       rx_ctx.crcstrip = 1;
+
+       /* clear the context in the HMC */
+       ret = i40e_clear_lan_rx_queue_context(hw, pf_queue_id);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to clear VF LAN Rx queue context %d, error: %d\n",
+                       pf_queue_id, ret);
+               ret = -ENOENT;
+               goto error_param;
+       }
+
+       /* set the context in the HMC */
+       ret = i40e_set_lan_rx_queue_context(hw, pf_queue_id, &rx_ctx);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "Failed to set VF LAN Rx queue context %d error: %d\n",
+                       pf_queue_id, ret);
+               ret = -ENOENT;
+               goto error_param;
+       }
+
+error_param:
+       return ret;
+}
+
+/**
+ * i40e_alloc_vsi_res
+ * @vf: pointer to the vf info
+ * @type: type of VSI to allocate
+ *
+ * alloc vf vsi context & resources
+ **/
+static int i40e_alloc_vsi_res(struct i40e_vf *vf, enum i40e_vsi_type type)
+{
+       struct i40e_mac_filter *f = NULL;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vsi *vsi;
+       int ret = 0;
+
+       vsi = i40e_vsi_setup(pf, type, pf->vsi[pf->lan_vsi]->seid, vf->vf_id);
+
+       if (!vsi) {
+               dev_err(&pf->pdev->dev,
+                       "add vsi failed for vf %d, aq_err %d\n",
+                       vf->vf_id, pf->hw.aq.asq_last_status);
+               ret = -ENOENT;
+               goto error_alloc_vsi_res;
+       }
+       if (type == I40E_VSI_SRIOV) {
+               vf->lan_vsi_index = vsi->idx;
+               vf->lan_vsi_id = vsi->id;
+               dev_info(&pf->pdev->dev,
+                        "LAN VSI index %d, VSI id %d\n",
+                        vsi->idx, vsi->id);
+               f = i40e_add_filter(vsi, vf->default_lan_addr.addr,
+                                   0, true, false);
+       }
+       if (!f) {
+               dev_err(&pf->pdev->dev, "Unable to add ucast filter\n");
+               ret = -ENOMEM;
+               goto error_alloc_vsi_res;
+       }
+
+       /* program mac filter */
+       ret = i40e_sync_vsi_filters(vsi);
+       if (ret) {
+               dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+               goto error_alloc_vsi_res;
+       }
+
+       /* accept bcast pkts. by default */
+       ret = i40e_aq_set_vsi_broadcast(hw, vsi->seid, true, NULL);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "set vsi bcast failed for vf %d, vsi %d, aq_err %d\n",
+                       vf->vf_id, vsi->idx, pf->hw.aq.asq_last_status);
+               ret = -EINVAL;
+       }
+
+error_alloc_vsi_res:
+       return ret;
+}
+
+/**
+ * i40e_reset_vf
+ * @vf: pointer to the vf structure
+ * @flr: VFLR was issued or not
+ *
+ * reset the vf
+ **/
+int i40e_reset_vf(struct i40e_vf *vf, bool flr)
+{
+       int ret = -ENOENT;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u32 reg, reg_idx, msix_vf;
+       bool rsd = false;
+       u16 pf_queue_id;
+       int i, j;
+
+       /* warn the VF */
+       wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_INPROGRESS);
+
+       clear_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
+
+       /* PF triggers VFR only when VF requests, in case of
+        * VFLR, HW triggers VFR
+        */
+       if (!flr) {
+               /* reset vf using VPGEN_VFRTRIG reg */
+               reg = I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+               wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+               i40e_flush(hw);
+       }
+
+       /* poll VPGEN_VFRSTAT reg to make sure
+        * that reset is complete
+        */
+       for (i = 0; i < 4; i++) {
+               /* vf reset requires driver to first reset the
+                * vf & than poll the status register to make sure
+                * that the requested op was completed
+                * successfully
+                */
+               udelay(10);
+               reg = rd32(hw, I40E_VPGEN_VFRSTAT(vf->vf_id));
+               if (reg & I40E_VPGEN_VFRSTAT_VFRD_MASK) {
+                       rsd = true;
+                       break;
+               }
+       }
+
+       if (!rsd)
+               dev_err(&pf->pdev->dev, "VF reset check timeout %d\n",
+                       vf->vf_id);
+
+       /* fast disable qps */
+       for (j = 0; j < pf->vsi[vf->lan_vsi_index]->num_queue_pairs; j++) {
+               ret = i40e_ctrl_vsi_tx_queue(vf, vf->lan_vsi_index, j,
+                                            I40E_QUEUE_CTRL_FASTDISABLE);
+               ret = i40e_ctrl_vsi_rx_queue(vf, vf->lan_vsi_index, j,
+                                            I40E_QUEUE_CTRL_FASTDISABLE);
+       }
+
+       /* Queue enable/disable requires driver to
+        * first reset the vf & than poll the status register
+        * to make sure that the requested op was completed
+        * successfully
+        */
+       udelay(10);
+       for (j = 0; j < pf->vsi[vf->lan_vsi_index]->num_queue_pairs; j++) {
+               ret = i40e_ctrl_vsi_tx_queue(vf, vf->lan_vsi_index, j,
+                                            I40E_QUEUE_CTRL_FASTDISABLECHECK);
+               if (ret)
+                       dev_info(&pf->pdev->dev,
+                                "Queue control check failed on Tx queue %d of VSI %d VF %d\n",
+                                vf->lan_vsi_index, j, vf->vf_id);
+               ret = i40e_ctrl_vsi_rx_queue(vf, vf->lan_vsi_index, j,
+                                            I40E_QUEUE_CTRL_FASTDISABLECHECK);
+               if (ret)
+                       dev_info(&pf->pdev->dev,
+                                "Queue control check failed on Rx queue %d of VSI %d VF %d\n",
+                                vf->lan_vsi_index, j, vf->vf_id);
+       }
+
+       /* clear the irq settings */
+       msix_vf = pf->hw.func_caps.num_msix_vectors_vf;
+       for (i = 0; i < msix_vf; i++) {
+               /* format is same for both registers */
+               if (0 == i)
+                       reg_idx = I40E_VPINT_LNKLST0(vf->vf_id);
+               else
+                       reg_idx = I40E_VPINT_LNKLSTN(((msix_vf - 1) *
+                                                     (vf->vf_id))
+                                                    + (i - 1));
+               reg = (I40E_VPINT_LNKLSTN_FIRSTQ_TYPE_MASK |
+                      I40E_VPINT_LNKLSTN_FIRSTQ_INDX_MASK);
+               wr32(hw, reg_idx, reg);
+               i40e_flush(hw);
+       }
+       /* disable interrupts so the VF starts in a known state */
+       for (i = 0; i < msix_vf; i++) {
+               /* format is same for both registers */
+               if (0 == i)
+                       reg_idx = I40E_VFINT_DYN_CTL0(vf->vf_id);
+               else
+                       reg_idx = I40E_VFINT_DYN_CTLN(((msix_vf - 1) *
+                                                     (vf->vf_id))
+                                                    + (i - 1));
+               wr32(hw, reg_idx, I40E_VFINT_DYN_CTLN_CLEARPBA_MASK);
+               i40e_flush(hw);
+       }
+
+       /* set the defaults for the rqctl & tqctl registers */
+       reg = (I40E_QINT_RQCTL_NEXTQ_INDX_MASK | I40E_QINT_RQCTL_ITR_INDX_MASK |
+              I40E_QINT_RQCTL_NEXTQ_TYPE_MASK);
+       for (j = 0; j < pf->vsi[vf->lan_vsi_index]->num_queue_pairs; j++) {
+               pf_queue_id = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_index, j);
+               wr32(hw, I40E_QINT_RQCTL(pf_queue_id), reg);
+               wr32(hw, I40E_QINT_TQCTL(pf_queue_id), reg);
+       }
+
+       /* clear the reset bit in the VPGEN_VFRTRIG reg */
+       reg = rd32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id));
+       reg &= ~I40E_VPGEN_VFRTRIG_VFSWR_MASK;
+       wr32(hw, I40E_VPGEN_VFRTRIG(vf->vf_id), reg);
+       /* tell the VF the reset is done */
+       wr32(hw, I40E_VFGEN_RSTAT1(vf->vf_id), I40E_VFR_COMPLETED);
+       i40e_flush(hw);
+
+       return ret;
+}
+
+/**
+ * i40e_enable_vf_mappings
+ * @vf: pointer to the vf info
+ *
+ * enable vf mappings
+ **/
+static void i40e_enable_vf_mappings(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       u32 reg, total_queue_pairs = 0;
+       int j;
+
+       /* Tell the hardware we're using noncontiguous mapping. HW requires
+        * that VF queues be mapped using this method, even when they are
+        * contiguous in real life
+        */
+       wr32(hw, I40E_VSILAN_QBASE(vf->lan_vsi_id),
+            I40E_VSILAN_QBASE_VSIQTABLE_ENA_MASK);
+
+       /* enable VF vplan_qtable mappings */
+       reg = I40E_VPLAN_MAPENA_TXRX_ENA_MASK;
+       wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), reg);
+
+       /* map PF queues to VF queues */
+       for (j = 0; j < pf->vsi[vf->lan_vsi_index]->num_queue_pairs; j++) {
+               u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_index, j);
+               reg = (qid & I40E_VPLAN_QTABLE_QINDEX_MASK);
+               wr32(hw, I40E_VPLAN_QTABLE(total_queue_pairs, vf->vf_id), reg);
+               total_queue_pairs++;
+       }
+
+       /* map PF queues to VSI */
+       for (j = 0; j < 7; j++) {
+               if (j * 2 >= pf->vsi[vf->lan_vsi_index]->num_queue_pairs) {
+                       reg = 0x07FF07FF;       /* unused */
+               } else {
+                       u16 qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_index,
+                                                         j * 2);
+                       reg = qid;
+                       qid = i40e_vc_get_pf_queue_id(vf, vf->lan_vsi_index,
+                                                     (j * 2) + 1);
+                       reg |= qid << 16;
+               }
+               wr32(hw, I40E_VSILAN_QTABLE(j, vf->lan_vsi_id), reg);
+       }
+
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_disable_vf_mappings
+ * @vf: pointer to the vf info
+ *
+ * disable vf mappings
+ **/
+static void i40e_disable_vf_mappings(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       int i;
+
+       /* disable qp mappings */
+       wr32(hw, I40E_VPLAN_MAPENA(vf->vf_id), 0);
+       for (i = 0; i < I40E_MAX_VSI_QP; i++)
+               wr32(hw, I40E_VPLAN_QTABLE(i, vf->vf_id),
+                    I40E_QUEUE_END_OF_LIST);
+       i40e_flush(hw);
+}
+
+/**
+ * i40e_free_vf_res
+ * @vf: pointer to the vf info
+ *
+ * free vf resources
+ **/
+static void i40e_free_vf_res(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+
+       /* free vsi & disconnect it from the parent uplink */
+       if (vf->lan_vsi_index) {
+               i40e_vsi_release(pf->vsi[vf->lan_vsi_index]);
+               vf->lan_vsi_index = 0;
+               vf->lan_vsi_id = 0;
+       }
+       /* reset some of the state varibles keeping
+        * track of the resources
+        */
+       vf->num_queue_pairs = 0;
+       vf->vf_states = 0;
+}
+
+/**
+ * i40e_alloc_vf_res
+ * @vf: pointer to the vf info
+ *
+ * allocate vf resources
+ **/
+static int i40e_alloc_vf_res(struct i40e_vf *vf)
+{
+       struct i40e_pf *pf = vf->pf;
+       int total_queue_pairs = 0;
+       int ret;
+
+       /* allocate hw vsi context & associated resources */
+       ret = i40e_alloc_vsi_res(vf, I40E_VSI_SRIOV);
+       if (ret)
+               goto error_alloc;
+       total_queue_pairs += pf->vsi[vf->lan_vsi_index]->num_queue_pairs;
+       set_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+
+       /* store the total qps number for the runtime
+        * vf req validation
+        */
+       vf->num_queue_pairs = total_queue_pairs;
+
+       /* vf is now completely initialized */
+       set_bit(I40E_VF_STAT_INIT, &vf->vf_states);
+
+error_alloc:
+       if (ret)
+               i40e_free_vf_res(vf);
+
+       return ret;
+}
+
+/**
+ * i40e_vfs_are_assigned
+ * @pf: pointer to the pf structure
+ *
+ * Determine if any VFs are assigned to VMs
+ **/
+static bool i40e_vfs_are_assigned(struct i40e_pf *pf)
+{
+       struct pci_dev *pdev = pf->pdev;
+       struct pci_dev *vfdev;
+
+       /* loop through all the VFs to see if we own any that are assigned */
+       vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, I40E_VF_DEVICE_ID , NULL);
+       while (vfdev) {
+               /* if we don't own it we don't care */
+               if (vfdev->is_virtfn && pci_physfn(vfdev) == pdev) {
+                       /* if it is assigned we cannot release it */
+                       if (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)
+                               return true;
+               }
+
+               vfdev = pci_get_device(PCI_VENDOR_ID_INTEL,
+                                      I40E_VF_DEVICE_ID,
+                                      vfdev);
+       }
+
+       return false;
+}
+
+/**
+ * i40e_free_vfs
+ * @pf: pointer to the pf structure
+ *
+ * free vf resources
+ **/
+void i40e_free_vfs(struct i40e_pf *pf)
+{
+       struct i40e_hw *hw = &pf->hw;
+       int i;
+
+       if (!pf->vf)
+               return;
+
+       /* Disable interrupt 0 so we don't try to handle the VFLR. */
+       wr32(hw, I40E_PFINT_DYN_CTL0, 0);
+       i40e_flush(hw);
+
+       /* free up vf resources */
+       for (i = 0; i < pf->num_alloc_vfs; i++) {
+               if (test_bit(I40E_VF_STAT_INIT, &pf->vf[i].vf_states))
+                       i40e_free_vf_res(&pf->vf[i]);
+               /* disable qp mappings */
+               i40e_disable_vf_mappings(&pf->vf[i]);
+       }
+
+       kfree(pf->vf);
+       pf->vf = NULL;
+       pf->num_alloc_vfs = 0;
+
+       if (!i40e_vfs_are_assigned(pf))
+               pci_disable_sriov(pf->pdev);
+       else
+               dev_warn(&pf->pdev->dev,
+                        "unable to disable SR-IOV because VFs are assigned.\n");
+
+       /* Re-enable interrupt 0. */
+       wr32(hw, I40E_PFINT_DYN_CTL0,
+            I40E_PFINT_DYN_CTL0_INTENA_MASK |
+            I40E_PFINT_DYN_CTL0_CLEARPBA_MASK |
+            (I40E_ITR_NONE << I40E_PFINT_DYN_CTL0_ITR_INDX_SHIFT));
+       i40e_flush(hw);
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * i40e_alloc_vfs
+ * @pf: pointer to the pf structure
+ * @num_alloc_vfs: number of vfs to allocate
+ *
+ * allocate vf resources
+ **/
+static int i40e_alloc_vfs(struct i40e_pf *pf, u16 num_alloc_vfs)
+{
+       struct i40e_vf *vfs;
+       int i, ret = 0;
+
+       ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
+       if (ret) {
+               dev_err(&pf->pdev->dev,
+                       "pci_enable_sriov failed with error %d!\n", ret);
+               pf->num_alloc_vfs = 0;
+               goto err_iov;
+       }
+
+       /* allocate memory */
+       vfs = kzalloc(num_alloc_vfs * sizeof(struct i40e_vf), GFP_KERNEL);
+       if (!vfs) {
+               ret = -ENOMEM;
+               goto err_alloc;
+       }
+
+       /* apply default profile */
+       for (i = 0; i < num_alloc_vfs; i++) {
+               vfs[i].pf = pf;
+               vfs[i].parent_type = I40E_SWITCH_ELEMENT_TYPE_VEB;
+               vfs[i].vf_id = i;
+
+               /* assign default capabilities */
+               set_bit(I40E_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
+
+               ret = i40e_alloc_vf_res(&vfs[i]);
+               i40e_reset_vf(&vfs[i], true);
+               if (ret)
+                       break;
+
+               /* enable vf vplan_qtable mappings */
+               i40e_enable_vf_mappings(&vfs[i]);
+       }
+       pf->vf = vfs;
+       pf->num_alloc_vfs = num_alloc_vfs;
+
+err_alloc:
+       if (ret)
+               i40e_free_vfs(pf);
+err_iov:
+       return ret;
+}
+
+#endif
+/**
+ * i40e_pci_sriov_enable
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of vfs to allocate
+ *
+ * Enable or change the number of VFs
+ **/
+static int i40e_pci_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+       int pre_existing_vfs = pci_num_vf(pdev);
+       int err = 0;
+
+       dev_info(&pdev->dev, "Allocating %d VFs.\n", num_vfs);
+       if (pre_existing_vfs && pre_existing_vfs != num_vfs)
+               i40e_free_vfs(pf);
+       else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
+               goto out;
+
+       if (num_vfs > pf->num_req_vfs) {
+               err = -EPERM;
+               goto err_out;
+       }
+
+       err = i40e_alloc_vfs(pf, num_vfs);
+       if (err) {
+               dev_warn(&pdev->dev, "Failed to enable SR-IOV: %d\n", err);
+               goto err_out;
+       }
+
+out:
+       return num_vfs;
+
+err_out:
+       return err;
+#endif
+       return 0;
+}
+
+/**
+ * i40e_pci_sriov_configure
+ * @pdev: pointer to a pci_dev structure
+ * @num_vfs: number of vfs to allocate
+ *
+ * Enable or change the number of VFs. Called when the user updates the number
+ * of VFs in sysfs.
+ **/
+int i40e_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+       struct i40e_pf *pf = pci_get_drvdata(pdev);
+
+       if (num_vfs)
+               return i40e_pci_sriov_enable(pdev, num_vfs);
+
+       i40e_free_vfs(pf);
+       return 0;
+}
+
+/***********************virtual channel routines******************/
+
+/**
+ * i40e_vc_send_msg_to_vf
+ * @vf: pointer to the vf info
+ * @v_opcode: virtual channel opcode
+ * @v_retval: virtual channel return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send msg to vf
+ **/
+static int i40e_vc_send_msg_to_vf(struct i40e_vf *vf, u32 v_opcode,
+                                 u32 v_retval, u8 *msg, u16 msglen)
+{
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       i40e_status aq_ret;
+
+       /* single place to detect unsuccessful return values */
+       if (v_retval) {
+               vf->num_invalid_msgs++;
+               dev_err(&pf->pdev->dev, "Failed opcode %d Error: %d\n",
+                       v_opcode, v_retval);
+               if (vf->num_invalid_msgs >
+                   I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED) {
+                       dev_err(&pf->pdev->dev,
+                               "Number of invalid messages exceeded for VF %d\n",
+                               vf->vf_id);
+                       dev_err(&pf->pdev->dev, "Use PF Control I/F to enable the VF\n");
+                       set_bit(I40E_VF_STAT_DISABLED, &vf->vf_states);
+               }
+       } else {
+               vf->num_valid_msgs++;
+       }
+
+       aq_ret = i40e_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval,
+                                    msg, msglen, NULL);
+       if (aq_ret) {
+               dev_err(&pf->pdev->dev,
+                       "Unable to send the message to VF %d aq_err %d\n",
+                       vf->vf_id, pf->hw.aq.asq_last_status);
+               return -EIO;
+       }
+
+       return 0;
+}
+
+/**
+ * i40e_vc_send_resp_to_vf
+ * @vf: pointer to the vf info
+ * @opcode: operation code
+ * @retval: return value
+ *
+ * send resp msg to vf
+ **/
+static int i40e_vc_send_resp_to_vf(struct i40e_vf *vf,
+                                  enum i40e_virtchnl_ops opcode,
+                                  i40e_status retval)
+{
+       return i40e_vc_send_msg_to_vf(vf, opcode, retval, NULL, 0);
+}
+
+/**
+ * i40e_vc_get_version_msg
+ * @vf: pointer to the vf info
+ *
+ * called from the vf to request the API version used by the PF
+ **/
+static int i40e_vc_get_version_msg(struct i40e_vf *vf)
+{
+       struct i40e_virtchnl_version_info info = {
+               I40E_VIRTCHNL_VERSION_MAJOR, I40E_VIRTCHNL_VERSION_MINOR
+       };
+
+       return i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_VERSION,
+                                     I40E_SUCCESS, (u8 *)&info,
+                                     sizeof(struct
+                                            i40e_virtchnl_version_info));
+}
+
+/**
+ * i40e_vc_get_vf_resources_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to request its resources
+ **/
+static int i40e_vc_get_vf_resources_msg(struct i40e_vf *vf)
+{
+       struct i40e_virtchnl_vf_resource *vfres = NULL;
+       struct i40e_pf *pf = vf->pf;
+       i40e_status aq_ret = 0;
+       struct i40e_vsi *vsi;
+       int i = 0, len = 0;
+       int num_vsis = 1;
+       int ret;
+
+       if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto err;
+       }
+
+       len = (sizeof(struct i40e_virtchnl_vf_resource) +
+              sizeof(struct i40e_virtchnl_vsi_resource) * num_vsis);
+
+       vfres = kzalloc(len, GFP_KERNEL);
+       if (!vfres) {
+               aq_ret = I40E_ERR_NO_MEMORY;
+               len = 0;
+               goto err;
+       }
+
+       vfres->vf_offload_flags = I40E_VIRTCHNL_VF_OFFLOAD_L2;
+       vsi = pf->vsi[vf->lan_vsi_index];
+       if (!vsi->info.pvid)
+               vfres->vf_offload_flags |= I40E_VIRTCHNL_VF_OFFLOAD_VLAN;
+
+       vfres->num_vsis = num_vsis;
+       vfres->num_queue_pairs = vf->num_queue_pairs;
+       vfres->max_vectors = pf->hw.func_caps.num_msix_vectors_vf;
+       if (vf->lan_vsi_index) {
+               vfres->vsi_res[i].vsi_id = vf->lan_vsi_index;
+               vfres->vsi_res[i].vsi_type = I40E_VSI_SRIOV;
+               vfres->vsi_res[i].num_queue_pairs =
+                   pf->vsi[vf->lan_vsi_index]->num_queue_pairs;
+               memcpy(vfres->vsi_res[i].default_mac_addr,
+                      vf->default_lan_addr.addr, ETH_ALEN);
+               i++;
+       }
+       set_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states);
+
+err:
+       /* send the response back to the vf */
+       ret = i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_VF_RESOURCES,
+                                    aq_ret, (u8 *)vfres, len);
+
+       kfree(vfres);
+       return ret;
+}
+
+/**
+ * i40e_vc_reset_vf_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to reset itself,
+ * unlike other virtchnl messages, pf driver
+ * doesn't send the response back to the vf
+ **/
+static int i40e_vc_reset_vf_msg(struct i40e_vf *vf)
+{
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states))
+               return -ENOENT;
+
+       return i40e_reset_vf(vf, false);
+}
+
+/**
+ * i40e_vc_config_promiscuous_mode_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to configure the promiscuous mode of
+ * vf vsis
+ **/
+static int i40e_vc_config_promiscuous_mode_msg(struct i40e_vf *vf,
+                                              u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_promisc_info *info =
+           (struct i40e_virtchnl_promisc_info *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_hw *hw = &pf->hw;
+       bool allmulti = false;
+       bool promisc = false;
+       i40e_status aq_ret;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) ||
+           !i40e_vc_isvalid_vsi_id(vf, info->vsi_id) ||
+           (pf->vsi[info->vsi_id]->type != I40E_VSI_FCOE)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if (info->flags & I40E_FLAG_VF_UNICAST_PROMISC)
+               promisc = true;
+       aq_ret = i40e_aq_set_vsi_unicast_promiscuous(hw, info->vsi_id,
+                                                    promisc, NULL);
+       if (aq_ret)
+               goto error_param;
+
+       if (info->flags & I40E_FLAG_VF_MULTICAST_PROMISC)
+               allmulti = true;
+       aq_ret = i40e_aq_set_vsi_multicast_promiscuous(hw, info->vsi_id,
+                                                      allmulti, NULL);
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf,
+                                      I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_config_queues_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to configure the rx/tx
+ * queues
+ **/
+static int i40e_vc_config_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_vsi_queue_config_info *qci =
+           (struct i40e_virtchnl_vsi_queue_config_info *)msg;
+       struct i40e_virtchnl_queue_pair_info *qpi;
+       u16 vsi_id, vsi_queue_id;
+       i40e_status aq_ret = 0;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       vsi_id = qci->vsi_id;
+       if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+       for (i = 0; i < qci->num_queue_pairs; i++) {
+               qpi = &qci->qpair[i];
+               vsi_queue_id = qpi->txq.queue_id;
+               if ((qpi->txq.vsi_id != vsi_id) ||
+                   (qpi->rxq.vsi_id != vsi_id) ||
+                   (qpi->rxq.queue_id != vsi_queue_id) ||
+                   !i40e_vc_isvalid_queue_id(vf, vsi_id, vsi_queue_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+
+               if (i40e_config_vsi_rx_queue(vf, vsi_id, vsi_queue_id,
+                                            &qpi->rxq) ||
+                   i40e_config_vsi_tx_queue(vf, vsi_id, vsi_queue_id,
+                                            &qpi->txq)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+       }
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_config_irq_map_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to configure the irq to
+ * queue map
+ **/
+static int i40e_vc_config_irq_map_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_irq_map_info *irqmap_info =
+           (struct i40e_virtchnl_irq_map_info *)msg;
+       struct i40e_virtchnl_vector_map *map;
+       u16 vsi_id, vsi_queue_id, vector_id;
+       i40e_status aq_ret = 0;
+       unsigned long tempmap;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       for (i = 0; i < irqmap_info->num_vectors; i++) {
+               map = &irqmap_info->vecmap[i];
+
+               vector_id = map->vector_id;
+               vsi_id = map->vsi_id;
+               /* validate msg params */
+               if (!i40e_vc_isvalid_vector_id(vf, vector_id) ||
+                   !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+
+               /* lookout for the invalid queue index */
+               tempmap = map->rxq_map;
+               vsi_queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+               while (vsi_queue_id < I40E_MAX_VSI_QP) {
+                       if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
+                                                     vsi_queue_id)) {
+                               aq_ret = I40E_ERR_PARAM;
+                               goto error_param;
+                       }
+                       vsi_queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                                    vsi_queue_id + 1);
+               }
+
+               tempmap = map->txq_map;
+               vsi_queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+               while (vsi_queue_id < I40E_MAX_VSI_QP) {
+                       if (!i40e_vc_isvalid_queue_id(vf, vsi_id,
+                                                     vsi_queue_id)) {
+                               aq_ret = I40E_ERR_PARAM;
+                               goto error_param;
+                       }
+                       vsi_queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                                    vsi_queue_id + 1);
+               }
+
+               i40e_config_irq_link_list(vf, vsi_id, map);
+       }
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_enable_queues_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to enable all or specific queue(s)
+ **/
+static int i40e_vc_enable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_queue_select *vqs =
+           (struct i40e_virtchnl_queue_select *)msg;
+       struct i40e_pf *pf = vf->pf;
+       u16 vsi_id = vqs->vsi_id;
+       i40e_status aq_ret = 0;
+       unsigned long tempmap;
+       u16 queue_id;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if (!i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if ((0 == vqs->rx_queues) && (0 == vqs->tx_queues)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       tempmap = vqs->rx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+               i40e_ctrl_vsi_rx_queue(vf, vsi_id, queue_id,
+                                      I40E_QUEUE_CTRL_ENABLE);
+
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       tempmap = vqs->tx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+               i40e_ctrl_vsi_tx_queue(vf, vsi_id, queue_id,
+                                      I40E_QUEUE_CTRL_ENABLE);
+
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       /* Poll the status register to make sure that the
+        * requested op was completed successfully
+        */
+       udelay(10);
+
+       tempmap = vqs->rx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (i40e_ctrl_vsi_rx_queue(vf, vsi_id, queue_id,
+                                          I40E_QUEUE_CTRL_ENABLECHECK)) {
+                       dev_err(&pf->pdev->dev,
+                               "Queue control check failed on RX queue %d of VSI %d VF %d\n",
+                               queue_id, vsi_id, vf->vf_id);
+               }
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       tempmap = vqs->tx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (i40e_ctrl_vsi_tx_queue(vf, vsi_id, queue_id,
+                                          I40E_QUEUE_CTRL_ENABLECHECK)) {
+                       dev_err(&pf->pdev->dev,
+                               "Queue control check failed on TX queue %d of VSI %d VF %d\n",
+                               queue_id, vsi_id, vf->vf_id);
+               }
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ENABLE_QUEUES,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_disable_queues_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to disable all or specific
+ * queue(s)
+ **/
+static int i40e_vc_disable_queues_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_queue_select *vqs =
+           (struct i40e_virtchnl_queue_select *)msg;
+       struct i40e_pf *pf = vf->pf;
+       u16 vsi_id = vqs->vsi_id;
+       i40e_status aq_ret = 0;
+       unsigned long tempmap;
+       u16 queue_id;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if ((0 == vqs->rx_queues) && (0 == vqs->tx_queues)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       tempmap = vqs->rx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+               i40e_ctrl_vsi_rx_queue(vf, vsi_id, queue_id,
+                                      I40E_QUEUE_CTRL_DISABLE);
+
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       tempmap = vqs->tx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (!i40e_vc_isvalid_queue_id(vf, vsi_id, queue_id)) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+               i40e_ctrl_vsi_tx_queue(vf, vsi_id, queue_id,
+                                      I40E_QUEUE_CTRL_DISABLE);
+
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       /* Poll the status register to make sure that the
+        * requested op was completed successfully
+        */
+       udelay(10);
+
+       tempmap = vqs->rx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (i40e_ctrl_vsi_rx_queue(vf, vsi_id, queue_id,
+                                          I40E_QUEUE_CTRL_DISABLECHECK)) {
+                       dev_err(&pf->pdev->dev,
+                               "Queue control check failed on RX queue %d of VSI %d VF %d\n",
+                               queue_id, vsi_id, vf->vf_id);
+               }
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+       tempmap = vqs->tx_queues;
+       queue_id = find_first_bit(&tempmap, I40E_MAX_VSI_QP);
+       while (queue_id < I40E_MAX_VSI_QP) {
+               if (i40e_ctrl_vsi_tx_queue(vf, vsi_id, queue_id,
+                                          I40E_QUEUE_CTRL_DISABLECHECK)) {
+                       dev_err(&pf->pdev->dev,
+                               "Queue control check failed on TX queue %d of VSI %d VF %d\n",
+                               queue_id, vsi_id, vf->vf_id);
+               }
+               queue_id = find_next_bit(&tempmap, I40E_MAX_VSI_QP,
+                                        queue_id + 1);
+       }
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DISABLE_QUEUES,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_get_stats_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf to get vsi stats
+ **/
+static int i40e_vc_get_stats_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_queue_select *vqs =
+           (struct i40e_virtchnl_queue_select *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_eth_stats stats;
+       i40e_status aq_ret = 0;
+       struct i40e_vsi *vsi;
+
+       memset(&stats, 0, sizeof(struct i40e_eth_stats));
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       if (!i40e_vc_isvalid_vsi_id(vf, vqs->vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       vsi = pf->vsi[vqs->vsi_id];
+       if (!vsi) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+       i40e_update_eth_stats(vsi);
+       memcpy(&stats, &vsi->eth_stats, sizeof(struct i40e_eth_stats));
+
+error_param:
+       /* send the response back to the vf */
+       return i40e_vc_send_msg_to_vf(vf, I40E_VIRTCHNL_OP_GET_STATS, aq_ret,
+                                     (u8 *)&stats, sizeof(stats));
+}
+
+/**
+ * i40e_vc_add_mac_addr_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * add guest mac address filter
+ **/
+static int i40e_vc_add_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_ether_addr_list *al =
+           (struct i40e_virtchnl_ether_addr_list *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       u16 vsi_id = al->vsi_id;
+       i40e_status aq_ret = 0;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) ||
+           !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       for (i = 0; i < al->num_elements; i++) {
+               if (is_broadcast_ether_addr(al->list[i].addr) ||
+                   is_zero_ether_addr(al->list[i].addr)) {
+                       dev_err(&pf->pdev->dev, "invalid VF MAC addr %pMAC\n",
+                               al->list[i].addr);
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+       }
+       vsi = pf->vsi[vsi_id];
+
+       /* add new addresses to the list */
+       for (i = 0; i < al->num_elements; i++) {
+               struct i40e_mac_filter *f;
+
+               f = i40e_find_mac(vsi, al->list[i].addr, true, false);
+               if (f) {
+                       if (i40e_is_vsi_in_vlan(vsi))
+                               f = i40e_put_mac_in_vlan(vsi, al->list[i].addr,
+                                                        true, false);
+                       else
+                               f = i40e_add_filter(vsi, al->list[i].addr, -1,
+                                                   true, false);
+               }
+
+               if (!f) {
+                       dev_err(&pf->pdev->dev,
+                               "Unable to add VF MAC filter\n");
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+       }
+
+       /* program the updated filter list */
+       if (i40e_sync_vsi_filters(vsi))
+               dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n");
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_del_mac_addr_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * remove guest mac address filter
+ **/
+static int i40e_vc_del_mac_addr_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_ether_addr_list *al =
+           (struct i40e_virtchnl_ether_addr_list *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       u16 vsi_id = al->vsi_id;
+       i40e_status aq_ret = 0;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) ||
+           !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+       vsi = pf->vsi[vsi_id];
+
+       /* delete addresses from the list */
+       for (i = 0; i < al->num_elements; i++)
+               i40e_del_filter(vsi, al->list[i].addr,
+                               I40E_VLAN_ANY, true, false);
+
+       /* program the updated filter list */
+       if (i40e_sync_vsi_filters(vsi))
+               dev_err(&pf->pdev->dev, "Unable to program VF MAC filters\n");
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS,
+                                      aq_ret);
+}
+
+/**
+ * i40e_vc_add_vlan_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * program guest vlan id
+ **/
+static int i40e_vc_add_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_vlan_filter_list *vfl =
+           (struct i40e_virtchnl_vlan_filter_list *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       u16 vsi_id = vfl->vsi_id;
+       i40e_status aq_ret = 0;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) ||
+           !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       for (i = 0; i < vfl->num_elements; i++) {
+               if (vfl->vlan_id[i] > I40E_MAX_VLANID) {
+                       aq_ret = I40E_ERR_PARAM;
+                       dev_err(&pf->pdev->dev,
+                               "invalid VF VLAN id %d\n", vfl->vlan_id[i]);
+                       goto error_param;
+               }
+       }
+       vsi = pf->vsi[vsi_id];
+       if (vsi->info.pvid) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       i40e_vlan_stripping_enable(vsi);
+       for (i = 0; i < vfl->num_elements; i++) {
+               /* add new VLAN filter */
+               int ret = i40e_vsi_add_vlan(vsi, vfl->vlan_id[i]);
+               if (ret)
+                       dev_err(&pf->pdev->dev,
+                               "Unable to add VF vlan filter %d, error %d\n",
+                               vfl->vlan_id[i], ret);
+       }
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_ADD_VLAN, aq_ret);
+}
+
+/**
+ * i40e_vc_remove_vlan_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * remove programmed guest vlan id
+ **/
+static int i40e_vc_remove_vlan_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       struct i40e_virtchnl_vlan_filter_list *vfl =
+           (struct i40e_virtchnl_vlan_filter_list *)msg;
+       struct i40e_pf *pf = vf->pf;
+       struct i40e_vsi *vsi = NULL;
+       u16 vsi_id = vfl->vsi_id;
+       i40e_status aq_ret = 0;
+       int i;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps) ||
+           !i40e_vc_isvalid_vsi_id(vf, vsi_id)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       for (i = 0; i < vfl->num_elements; i++) {
+               if (vfl->vlan_id[i] > I40E_MAX_VLANID) {
+                       aq_ret = I40E_ERR_PARAM;
+                       goto error_param;
+               }
+       }
+
+       vsi = pf->vsi[vsi_id];
+       if (vsi->info.pvid) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+
+       for (i = 0; i < vfl->num_elements; i++) {
+               int ret = i40e_vsi_kill_vlan(vsi, vfl->vlan_id[i]);
+               if (ret)
+                       dev_err(&pf->pdev->dev,
+                               "Unable to delete VF vlan filter %d, error %d\n",
+                               vfl->vlan_id[i], ret);
+       }
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_DEL_VLAN, aq_ret);
+}
+
+/**
+ * i40e_vc_fcoe_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * called from the vf for the fcoe msgs
+ **/
+static int i40e_vc_fcoe_msg(struct i40e_vf *vf, u8 *msg, u16 msglen)
+{
+       i40e_status aq_ret = 0;
+
+       if (!test_bit(I40E_VF_STAT_ACTIVE, &vf->vf_states) ||
+           !test_bit(I40E_VF_STAT_FCOEENA, &vf->vf_states)) {
+               aq_ret = I40E_ERR_PARAM;
+               goto error_param;
+       }
+       aq_ret = I40E_ERR_NOT_IMPLEMENTED;
+
+error_param:
+       /* send the response to the vf */
+       return i40e_vc_send_resp_to_vf(vf, I40E_VIRTCHNL_OP_FCOE, aq_ret);
+}
+
+/**
+ * i40e_vc_validate_vf_msg
+ * @vf: pointer to the vf info
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @msghndl: msg handle
+ *
+ * validate msg
+ **/
+static int i40e_vc_validate_vf_msg(struct i40e_vf *vf, u32 v_opcode,
+                                  u32 v_retval, u8 *msg, u16 msglen)
+{
+       bool err_msg_format = false;
+       int valid_len;
+
+       /* Check if VF is disabled. */
+       if (test_bit(I40E_VF_STAT_DISABLED, &vf->vf_states))
+               return I40E_ERR_PARAM;
+
+       /* Validate message length. */
+       switch (v_opcode) {
+       case I40E_VIRTCHNL_OP_VERSION:
+               valid_len = sizeof(struct i40e_virtchnl_version_info);
+               break;
+       case I40E_VIRTCHNL_OP_RESET_VF:
+       case I40E_VIRTCHNL_OP_GET_VF_RESOURCES:
+               valid_len = 0;
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_TX_QUEUE:
+               valid_len = sizeof(struct i40e_virtchnl_txq_info);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_RX_QUEUE:
+               valid_len = sizeof(struct i40e_virtchnl_rxq_info);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+               valid_len = sizeof(struct i40e_virtchnl_vsi_queue_config_info);
+               if (msglen >= valid_len) {
+                       struct i40e_virtchnl_vsi_queue_config_info *vqc =
+                           (struct i40e_virtchnl_vsi_queue_config_info *)msg;
+                       valid_len += (vqc->num_queue_pairs *
+                                     sizeof(struct
+                                            i40e_virtchnl_queue_pair_info));
+                       if (vqc->num_queue_pairs == 0)
+                               err_msg_format = true;
+               }
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
+               valid_len = sizeof(struct i40e_virtchnl_irq_map_info);
+               if (msglen >= valid_len) {
+                       struct i40e_virtchnl_irq_map_info *vimi =
+                           (struct i40e_virtchnl_irq_map_info *)msg;
+                       valid_len += (vimi->num_vectors *
+                                     sizeof(struct i40e_virtchnl_vector_map));
+                       if (vimi->num_vectors == 0)
+                               err_msg_format = true;
+               }
+               break;
+       case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
+       case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
+               valid_len = sizeof(struct i40e_virtchnl_queue_select);
+               break;
+       case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
+       case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
+               valid_len = sizeof(struct i40e_virtchnl_ether_addr_list);
+               if (msglen >= valid_len) {
+                       struct i40e_virtchnl_ether_addr_list *veal =
+                           (struct i40e_virtchnl_ether_addr_list *)msg;
+                       valid_len += veal->num_elements *
+                           sizeof(struct i40e_virtchnl_ether_addr);
+                       if (veal->num_elements == 0)
+                               err_msg_format = true;
+               }
+               break;
+       case I40E_VIRTCHNL_OP_ADD_VLAN:
+       case I40E_VIRTCHNL_OP_DEL_VLAN:
+               valid_len = sizeof(struct i40e_virtchnl_vlan_filter_list);
+               if (msglen >= valid_len) {
+                       struct i40e_virtchnl_vlan_filter_list *vfl =
+                           (struct i40e_virtchnl_vlan_filter_list *)msg;
+                       valid_len += vfl->num_elements * sizeof(u16);
+                       if (vfl->num_elements == 0)
+                               err_msg_format = true;
+               }
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+               valid_len = sizeof(struct i40e_virtchnl_promisc_info);
+               break;
+       case I40E_VIRTCHNL_OP_GET_STATS:
+               valid_len = sizeof(struct i40e_virtchnl_queue_select);
+               break;
+       /* These are always errors coming from the VF. */
+       case I40E_VIRTCHNL_OP_EVENT:
+       case I40E_VIRTCHNL_OP_UNKNOWN:
+       default:
+               return -EPERM;
+               break;
+       }
+       /* few more checks */
+       if ((valid_len != msglen) || (err_msg_format)) {
+               i40e_vc_send_resp_to_vf(vf, v_opcode, I40E_ERR_PARAM);
+               return -EINVAL;
+       } else {
+               return 0;
+       }
+}
+
+/**
+ * i40e_vc_process_vf_msg
+ * @pf: pointer to the pf structure
+ * @vf_id: source vf id
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ * @msghndl: msg handle
+ *
+ * called from the common aeq/arq handler to
+ * process request from vf
+ **/
+int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode,
+                          u32 v_retval, u8 *msg, u16 msglen)
+{
+       struct i40e_vf *vf = &(pf->vf[vf_id]);
+       struct i40e_hw *hw = &pf->hw;
+       int ret;
+
+       pf->vf_aq_requests++;
+       /* perform basic checks on the msg */
+       ret = i40e_vc_validate_vf_msg(vf, v_opcode, v_retval, msg, msglen);
+
+       if (ret) {
+               dev_err(&pf->pdev->dev, "invalid message from vf %d\n", vf_id);
+               return ret;
+       }
+       wr32(hw, I40E_VFGEN_RSTAT1(vf_id), I40E_VFR_VFACTIVE);
+       switch (v_opcode) {
+       case I40E_VIRTCHNL_OP_VERSION:
+               ret = i40e_vc_get_version_msg(vf);
+               break;
+       case I40E_VIRTCHNL_OP_GET_VF_RESOURCES:
+               ret = i40e_vc_get_vf_resources_msg(vf);
+               break;
+       case I40E_VIRTCHNL_OP_RESET_VF:
+               ret = i40e_vc_reset_vf_msg(vf);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+               ret = i40e_vc_config_promiscuous_mode_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_VSI_QUEUES:
+               ret = i40e_vc_config_queues_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_CONFIG_IRQ_MAP:
+               ret = i40e_vc_config_irq_map_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_ENABLE_QUEUES:
+               ret = i40e_vc_enable_queues_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_DISABLE_QUEUES:
+               ret = i40e_vc_disable_queues_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_ADD_ETHER_ADDRESS:
+               ret = i40e_vc_add_mac_addr_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_DEL_ETHER_ADDRESS:
+               ret = i40e_vc_del_mac_addr_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_ADD_VLAN:
+               ret = i40e_vc_add_vlan_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_DEL_VLAN:
+               ret = i40e_vc_remove_vlan_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_GET_STATS:
+               ret = i40e_vc_get_stats_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_FCOE:
+               ret = i40e_vc_fcoe_msg(vf, msg, msglen);
+               break;
+       case I40E_VIRTCHNL_OP_UNKNOWN:
+       default:
+               dev_err(&pf->pdev->dev,
+                       "Unsupported opcode %d from vf %d\n", v_opcode, vf_id);
+               ret = i40e_vc_send_resp_to_vf(vf, v_opcode,
+                                             I40E_ERR_NOT_IMPLEMENTED);
+               break;
+       }
+
+       return ret;
+}
+
+/**
+ * i40e_vc_process_vflr_event
+ * @pf: pointer to the pf structure
+ *
+ * called from the vlfr irq handler to
+ * free up vf resources and state variables
+ **/
+int i40e_vc_process_vflr_event(struct i40e_pf *pf)
+{
+       u32 reg, reg_idx, bit_idx, vf_id;
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vf *vf;
+
+       if (!test_bit(__I40E_VFLR_EVENT_PENDING, &pf->state))
+               return 0;
+
+       clear_bit(__I40E_VFLR_EVENT_PENDING, &pf->state);
+       for (vf_id = 0; vf_id < pf->num_alloc_vfs; vf_id++) {
+               reg_idx = (hw->func_caps.vf_base_id + vf_id) / 32;
+               bit_idx = (hw->func_caps.vf_base_id + vf_id) % 32;
+               /* read GLGEN_VFLRSTAT register to find out the flr vfs */
+               vf = &pf->vf[vf_id];
+               reg = rd32(hw, I40E_GLGEN_VFLRSTAT(reg_idx));
+               if (reg & (1 << bit_idx)) {
+                       /* clear the bit in GLGEN_VFLRSTAT */
+                       wr32(hw, I40E_GLGEN_VFLRSTAT(reg_idx), (1 << bit_idx));
+
+                       if (i40e_reset_vf(vf, true))
+                               dev_err(&pf->pdev->dev,
+                                       "Unable to reset the VF %d\n", vf_id);
+                       /* free up vf resources to destroy vsi state */
+                       i40e_free_vf_res(vf);
+
+                       /* allocate new vf resources with the default state */
+                       if (i40e_alloc_vf_res(vf))
+                               dev_err(&pf->pdev->dev,
+                                       "Unable to allocate VF resources %d\n",
+                                       vf_id);
+
+                       i40e_enable_vf_mappings(vf);
+               }
+       }
+
+       /* re-enable vflr interrupt cause */
+       reg = rd32(hw, I40E_PFINT_ICR0_ENA);
+       reg |= I40E_PFINT_ICR0_ENA_VFLR_MASK;
+       wr32(hw, I40E_PFINT_ICR0_ENA, reg);
+       i40e_flush(hw);
+
+       return 0;
+}
+
+/**
+ * i40e_vc_vf_broadcast
+ * @pf: pointer to the pf structure
+ * @opcode: operation code
+ * @retval: return value
+ * @msg: pointer to the msg buffer
+ * @msglen: msg length
+ *
+ * send a message to all VFs on a given PF
+ **/
+static void i40e_vc_vf_broadcast(struct i40e_pf *pf,
+                                enum i40e_virtchnl_ops v_opcode,
+                                i40e_status v_retval, u8 *msg,
+                                u16 msglen)
+{
+       struct i40e_hw *hw = &pf->hw;
+       struct i40e_vf *vf = pf->vf;
+       int i;
+
+       for (i = 0; i < pf->num_alloc_vfs; i++) {
+               /* Ignore return value on purpose - a given VF may fail, but
+                * we need to keep going and send to all of them
+                */
+               i40e_aq_send_msg_to_vf(hw, vf->vf_id, v_opcode, v_retval,
+                                      msg, msglen, NULL);
+               vf++;
+       }
+}
+
+/**
+ * i40e_vc_notify_link_state
+ * @pf: pointer to the pf structure
+ *
+ * send a link status message to all VFs on a given PF
+ **/
+void i40e_vc_notify_link_state(struct i40e_pf *pf)
+{
+       struct i40e_virtchnl_pf_event pfe;
+
+       pfe.event = I40E_VIRTCHNL_EVENT_LINK_CHANGE;
+       pfe.severity = I40E_PF_EVENT_SEVERITY_INFO;
+       pfe.event_data.link_event.link_status =
+           pf->hw.phy.link_info.link_info & I40E_AQ_LINK_UP;
+       pfe.event_data.link_event.link_speed = pf->hw.phy.link_info.link_speed;
+
+       i40e_vc_vf_broadcast(pf, I40E_VIRTCHNL_OP_EVENT, I40E_SUCCESS,
+                            (u8 *)&pfe, sizeof(struct i40e_virtchnl_pf_event));
+}
+
+/**
+ * i40e_vc_notify_reset
+ * @pf: pointer to the pf structure
+ *
+ * indicate a pending reset to all VFs on a given PF
+ **/
+void i40e_vc_notify_reset(struct i40e_pf *pf)
+{
+       struct i40e_virtchnl_pf_event pfe;
+
+       pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING;
+       pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
+       i40e_vc_vf_broadcast(pf, I40E_VIRTCHNL_OP_EVENT, I40E_SUCCESS,
+                            (u8 *)&pfe, sizeof(struct i40e_virtchnl_pf_event));
+}
+
+/**
+ * i40e_vc_notify_vf_reset
+ * @vf: pointer to the vf structure
+ *
+ * indicate a pending reset to the given VF
+ **/
+void i40e_vc_notify_vf_reset(struct i40e_vf *vf)
+{
+       struct i40e_virtchnl_pf_event pfe;
+
+       pfe.event = I40E_VIRTCHNL_EVENT_RESET_IMPENDING;
+       pfe.severity = I40E_PF_EVENT_SEVERITY_CERTAIN_DOOM;
+       i40e_aq_send_msg_to_vf(&vf->pf->hw, vf->vf_id, I40E_VIRTCHNL_OP_EVENT,
+                              I40E_SUCCESS, (u8 *)&pfe,
+                              sizeof(struct i40e_virtchnl_pf_event), NULL);
+}
+
+/**
+ * i40e_ndo_set_vf_mac
+ * @netdev: network interface device structure
+ * @vf_id: vf identifier
+ * @mac: mac address
+ *
+ * program vf mac address
+ **/
+int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_mac_filter *f;
+       struct i40e_vf *vf;
+       int ret = 0;
+
+       /* validate the request */
+       if (vf_id >= pf->num_alloc_vfs) {
+               dev_err(&pf->pdev->dev,
+                       "Invalid VF Identifier %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_param;
+       }
+
+       vf = &(pf->vf[vf_id]);
+       vsi = pf->vsi[vf->lan_vsi_index];
+       if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
+               dev_err(&pf->pdev->dev,
+                       "Uninitialized VF %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_param;
+       }
+
+       if (!is_valid_ether_addr(mac)) {
+               dev_err(&pf->pdev->dev,
+                       "Invalid VF ethernet address\n");
+               ret = -EINVAL;
+               goto error_param;
+       }
+
+       /* delete the temporary mac address */
+       i40e_del_filter(vsi, vf->default_lan_addr.addr, 0, true, false);
+
+       /* add the new mac address */
+       f = i40e_add_filter(vsi, mac, 0, true, false);
+       if (!f) {
+               dev_err(&pf->pdev->dev,
+                       "Unable to add VF ucast filter\n");
+               ret = -ENOMEM;
+               goto error_param;
+       }
+
+       dev_info(&pf->pdev->dev, "Setting MAC %pM on VF %d\n", mac, vf_id);
+       /* program mac filter */
+       if (i40e_sync_vsi_filters(vsi)) {
+               dev_err(&pf->pdev->dev, "Unable to program ucast filters\n");
+               ret = -EIO;
+               goto error_param;
+       }
+       memcpy(vf->default_lan_addr.addr, mac, ETH_ALEN);
+       dev_info(&pf->pdev->dev, "Reload the VF driver to make this change effective.\n");
+       ret = 0;
+
+error_param:
+       return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_port_vlan
+ * @netdev: network interface device structure
+ * @vf_id: vf identifier
+ * @vlan_id: mac address
+ * @qos: priority setting
+ *
+ * program vf vlan id and/or qos
+ **/
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
+                             int vf_id, u16 vlan_id, u8 qos)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_pf *pf = np->vsi->back;
+       struct i40e_vsi *vsi;
+       struct i40e_vf *vf;
+       int ret = 0;
+
+       /* validate the request */
+       if (vf_id >= pf->num_alloc_vfs) {
+               dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_pvid;
+       }
+
+       if ((vlan_id > I40E_MAX_VLANID) || (qos > 7)) {
+               dev_err(&pf->pdev->dev, "Invalid VF Parameters\n");
+               ret = -EINVAL;
+               goto error_pvid;
+       }
+
+       vf = &(pf->vf[vf_id]);
+       vsi = pf->vsi[vf->lan_vsi_index];
+       if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
+               dev_err(&pf->pdev->dev, "Uninitialized VF %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_pvid;
+       }
+
+       if (vsi->info.pvid) {
+               /* kill old VLAN */
+               ret = i40e_vsi_kill_vlan(vsi, (le16_to_cpu(vsi->info.pvid) &
+                                              VLAN_VID_MASK));
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "remove VLAN failed, ret=%d, aq_err=%d\n",
+                                ret, pf->hw.aq.asq_last_status);
+               }
+       }
+       if (vlan_id || qos)
+               ret = i40e_vsi_add_pvid(vsi,
+                               vlan_id | (qos << I40E_VLAN_PRIORITY_SHIFT));
+       else
+               i40e_vlan_stripping_disable(vsi);
+
+       if (vlan_id) {
+               dev_info(&pf->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+                        vlan_id, qos, vf_id);
+
+               /* add new VLAN filter */
+               ret = i40e_vsi_add_vlan(vsi, vlan_id);
+               if (ret) {
+                       dev_info(&vsi->back->pdev->dev,
+                                "add VF VLAN failed, ret=%d aq_err=%d\n", ret,
+                                vsi->back->hw.aq.asq_last_status);
+                       goto error_pvid;
+               }
+       }
+
+       if (ret) {
+               dev_err(&pf->pdev->dev, "Unable to update VF vsi context\n");
+               goto error_pvid;
+       }
+       ret = 0;
+
+error_pvid:
+       return ret;
+}
+
+/**
+ * i40e_ndo_set_vf_bw
+ * @netdev: network interface device structure
+ * @vf_id: vf identifier
+ * @tx_rate: tx rate
+ *
+ * configure vf tx rate
+ **/
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate)
+{
+       return -EOPNOTSUPP;
+}
+
+/**
+ * i40e_ndo_get_vf_config
+ * @netdev: network interface device structure
+ * @vf_id: vf identifier
+ * @ivi: vf configuration structure
+ *
+ * return vf configuration
+ **/
+int i40e_ndo_get_vf_config(struct net_device *netdev,
+                          int vf_id, struct ifla_vf_info *ivi)
+{
+       struct i40e_netdev_priv *np = netdev_priv(netdev);
+       struct i40e_mac_filter *f, *ftmp;
+       struct i40e_vsi *vsi = np->vsi;
+       struct i40e_pf *pf = vsi->back;
+       struct i40e_vf *vf;
+       int ret = 0;
+
+       /* validate the request */
+       if (vf_id >= pf->num_alloc_vfs) {
+               dev_err(&pf->pdev->dev, "Invalid VF Identifier %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_param;
+       }
+
+       vf = &(pf->vf[vf_id]);
+       /* first vsi is always the LAN vsi */
+       vsi = pf->vsi[vf->lan_vsi_index];
+       if (!test_bit(I40E_VF_STAT_INIT, &vf->vf_states)) {
+               dev_err(&pf->pdev->dev, "Uninitialized VF %d\n", vf_id);
+               ret = -EINVAL;
+               goto error_param;
+       }
+
+       ivi->vf = vf_id;
+
+       /* first entry of the list is the default ethernet address */
+       list_for_each_entry_safe(f, ftmp, &vsi->mac_filter_list, list) {
+               memcpy(&ivi->mac, f->macaddr, I40E_ETH_LENGTH_OF_ADDRESS);
+               break;
+       }
+
+       ivi->tx_rate = 0;
+       ivi->vlan = le16_to_cpu(vsi->info.pvid) & I40E_VLAN_MASK;
+       ivi->qos = (le16_to_cpu(vsi->info.pvid) & I40E_PRIORITY_MASK) >>
+                  I40E_VLAN_PRIORITY_SHIFT;
+       ret = 0;
+
+error_param:
+       return ret;
+}
diff --git a/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h b/drivers/net/ethernet/intel/i40e/i40e_virtchnl_pf.h
new file mode 100644 (file)
index 0000000..360382c
--- /dev/null
@@ -0,0 +1,120 @@
+/*******************************************************************************
+ *
+ * Intel Ethernet Controller XL710 Family Linux Driver
+ * Copyright(c) 2013 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ * Contact Information:
+ * e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ ******************************************************************************/
+
+#ifndef _I40E_VIRTCHNL_PF_H_
+#define _I40E_VIRTCHNL_PF_H_
+
+#include "i40e.h"
+
+#define I40E_MAX_MACVLAN_FILTERS 256
+#define I40E_MAX_VLAN_FILTERS 256
+#define I40E_MAX_VLANID 4095
+
+#define I40E_VIRTCHNL_SUPPORTED_QTYPES 2
+
+#define I40E_DEFAULT_NUM_MDD_EVENTS_ALLOWED    3
+#define I40E_DEFAULT_NUM_INVALID_MSGS_ALLOWED  10
+
+#define I40E_VLAN_PRIORITY_SHIFT       12
+#define I40E_VLAN_MASK                 0xFFF
+#define I40E_PRIORITY_MASK             0x7000
+
+/* Various queue ctrls */
+enum i40e_queue_ctrl {
+       I40E_QUEUE_CTRL_UNKNOWN = 0,
+       I40E_QUEUE_CTRL_ENABLE,
+       I40E_QUEUE_CTRL_ENABLECHECK,
+       I40E_QUEUE_CTRL_DISABLE,
+       I40E_QUEUE_CTRL_DISABLECHECK,
+       I40E_QUEUE_CTRL_FASTDISABLE,
+       I40E_QUEUE_CTRL_FASTDISABLECHECK,
+};
+
+/* VF states */
+enum i40e_vf_states {
+       I40E_VF_STAT_INIT = 0,
+       I40E_VF_STAT_ACTIVE,
+       I40E_VF_STAT_FCOEENA,
+       I40E_VF_STAT_DISABLED,
+};
+
+/* VF capabilities */
+enum i40e_vf_capabilities {
+       I40E_VIRTCHNL_VF_CAP_PRIVILEGE = 0,
+       I40E_VIRTCHNL_VF_CAP_L2,
+};
+
+/* VF information structure */
+struct i40e_vf {
+       struct i40e_pf *pf;
+
+       /* vf id in the pf space */
+       u16 vf_id;
+       /* all vf vsis connect to the same parent */
+       enum i40e_switch_element_types parent_type;
+
+       /* vf Port Extender (PE) stag if used */
+       u16 stag;
+
+       struct i40e_virtchnl_ether_addr default_lan_addr;
+       struct i40e_virtchnl_ether_addr default_fcoe_addr;
+
+       /* VSI indices - actual VSI pointers are maintained in the PF structure
+        * When assigned, these will be non-zero, because VSI 0 is always
+        * the main LAN VSI for the PF.
+        */
+       u8 lan_vsi_index;       /* index into PF struct */
+       u8 lan_vsi_id;          /* ID as used by firmware */
+
+       u8 num_queue_pairs;     /* num of qps assigned to vf vsis */
+       u64 num_mdd_events;     /* num of mdd events detected */
+       u64 num_invalid_msgs;   /* num of malformed or invalid msgs detected */
+       u64 num_valid_msgs;     /* num of valid msgs detected */
+
+       unsigned long vf_caps;  /* vf's adv. capabilities */
+       unsigned long vf_states;        /* vf's runtime states */
+};
+
+void i40e_free_vfs(struct i40e_pf *pf);
+int i40e_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+int i40e_vc_process_vf_msg(struct i40e_pf *pf, u16 vf_id, u32 v_opcode,
+                          u32 v_retval, u8 *msg, u16 msglen);
+int i40e_vc_process_vflr_event(struct i40e_pf *pf);
+int i40e_reset_vf(struct i40e_vf *vf, bool flr);
+void i40e_vc_notify_vf_reset(struct i40e_vf *vf);
+
+/* vf configuration related iplink handlers */
+int i40e_ndo_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac);
+int i40e_ndo_set_vf_port_vlan(struct net_device *netdev,
+                             int vf_id, u16 vlan_id, u8 qos);
+int i40e_ndo_set_vf_bw(struct net_device *netdev, int vf_id, int tx_rate);
+int i40e_ndo_get_vf_config(struct net_device *netdev,
+                          int vf_id, struct ifla_vf_info *ivi);
+void i40e_vc_notify_link_state(struct i40e_pf *pf);
+void i40e_vc_notify_reset(struct i40e_pf *pf);
+
+#endif /* _I40E_VIRTCHNL_PF_H_ */
index 270e65f..a36fa80 100644 (file)
@@ -996,14 +996,14 @@ static int korina_open(struct net_device *dev)
         * that handles the Done Finished
         * Ovr and Und Events */
        ret = request_irq(lp->rx_irq, korina_rx_dma_interrupt,
-                       IRQF_DISABLED, "Korina ethernet Rx", dev);
+                       0, "Korina ethernet Rx", dev);
        if (ret < 0) {
                printk(KERN_ERR "%s: unable to get Rx DMA IRQ %d\n",
                    dev->name, lp->rx_irq);
                goto err_release;
        }
        ret = request_irq(lp->tx_irq, korina_tx_dma_interrupt,
-                       IRQF_DISABLED, "Korina ethernet Tx", dev);
+                       0, "Korina ethernet Tx", dev);
        if (ret < 0) {
                printk(KERN_ERR "%s: unable to get Tx DMA IRQ %d\n",
                    dev->name, lp->tx_irq);
@@ -1012,7 +1012,7 @@ static int korina_open(struct net_device *dev)
 
        /* Install handler for overrun error. */
        ret = request_irq(lp->ovr_irq, korina_ovr_interrupt,
-                       IRQF_DISABLED, "Ethernet Overflow", dev);
+                       0, "Ethernet Overflow", dev);
        if (ret < 0) {
                printk(KERN_ERR "%s: unable to get OVR IRQ %d\n",
                    dev->name, lp->ovr_irq);
@@ -1021,7 +1021,7 @@ static int korina_open(struct net_device *dev)
 
        /* Install handler for underflow error. */
        ret = request_irq(lp->und_irq, korina_und_interrupt,
-                       IRQF_DISABLED, "Ethernet Underflow", dev);
+                       0, "Ethernet Underflow", dev);
        if (ret < 0) {
                printk(KERN_ERR "%s: unable to get UND IRQ %d\n",
                    dev->name, lp->und_irq);
index 88349b8..81bf836 100644 (file)
@@ -430,7 +430,7 @@ struct qlcnic_hardware_context {
        u8 diag_test;
        u8 num_msix;
        u8 nic_mode;
-       char diag_cnt;
+       int diag_cnt;
 
        u16 max_uc_count;
        u16 port_type;
index 949076f..13e6fff 100644 (file)
@@ -1734,7 +1734,8 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
        unsigned int data_len = skb->len - sh_len;
        unsigned char *data = skb->data;
        unsigned int ih_off, th_off, p_len;
-       unsigned int isum_seed, tsum_seed, id, seq;
+       unsigned int isum_seed, tsum_seed, seq;
+       unsigned int uninitialized_var(id);
        int is_ipv6;
        long f_id = -1;    /* id of the current fragment */
        long f_size = skb_headlen(skb) - sh_len;  /* current fragment size */
@@ -1781,7 +1782,7 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
                } else {
                        ih = (struct iphdr *)(buf + ih_off);
                        ih->tot_len = htons(sh_len + p_len - ih_off);
-                       ih->id = htons(id);
+                       ih->id = htons(id++);
                        ih->check = csum_long(isum_seed + ih->tot_len +
                                              ih->id) ^ 0xffff;
                }
@@ -1818,7 +1819,6 @@ static void tso_headers_prepare(struct sk_buff *skb, unsigned char *headers,
                        slot++;
                }
 
-               id++;
                seq += p_len;
 
                /* The last segment may be less than gso_size. */
index 510b9c8..31bcb98 100644 (file)
@@ -1488,7 +1488,7 @@ static void
 toshoboe_close (struct pci_dev *pci_dev)
 {
   int i;
-  struct toshoboe_cb *self = (struct toshoboe_cb*)pci_get_drvdata(pci_dev);
+  struct toshoboe_cb *self = pci_get_drvdata(pci_dev);
 
   IRDA_DEBUG (4, "%s()\n", __func__);
 
@@ -1696,7 +1696,7 @@ freeself:
 static int
 toshoboe_gotosleep (struct pci_dev *pci_dev, pm_message_t crap)
 {
-  struct toshoboe_cb *self = (struct toshoboe_cb*)pci_get_drvdata(pci_dev);
+  struct toshoboe_cb *self = pci_get_drvdata(pci_dev);
   unsigned long flags;
   int i = 10;
 
@@ -1725,7 +1725,7 @@ toshoboe_gotosleep (struct pci_dev *pci_dev, pm_message_t crap)
 static int
 toshoboe_wakeup (struct pci_dev *pci_dev)
 {
-  struct toshoboe_cb *self = (struct toshoboe_cb*)pci_get_drvdata(pci_dev);
+  struct toshoboe_cb *self = pci_get_drvdata(pci_dev);
   unsigned long flags;
 
   IRDA_DEBUG (4, "%s()\n", __func__);
index 5f47584..c5bd58b 100644 (file)
@@ -543,7 +543,7 @@ static int vlsi_process_rx(struct vlsi_ring *r, struct ring_descr *rd)
        int             crclen, len = 0;
        struct sk_buff  *skb;
        int             ret = 0;
-       struct net_device *ndev = (struct net_device *)pci_get_drvdata(r->pdev);
+       struct net_device *ndev = pci_get_drvdata(r->pdev);
        vlsi_irda_dev_t *idev = netdev_priv(ndev);
 
        pci_dma_sync_single_for_cpu(r->pdev, rd_get_addr(rd), r->len, r->dir);
index 64dfaa3..9bf46bd 100644 (file)
@@ -118,8 +118,6 @@ static int macvlan_broadcast_one(struct sk_buff *skb,
                                 const struct ethhdr *eth, bool local)
 {
        struct net_device *dev = vlan->dev;
-       if (!skb)
-               return NET_RX_DROP;
 
        if (local)
                return vlan->forward(dev, skb);
@@ -171,9 +169,13 @@ static void macvlan_broadcast(struct sk_buff *skb,
                        hash = mc_hash(vlan, eth->h_dest);
                        if (!test_bit(hash, vlan->mc_filter))
                                continue;
+
+                       err = NET_RX_DROP;
                        nskb = skb_clone(skb, GFP_ATOMIC);
-                       err = macvlan_broadcast_one(nskb, vlan, eth,
-                                        mode == MACVLAN_MODE_BRIDGE);
+                       if (likely(nskb))
+                               err = macvlan_broadcast_one(
+                                       nskb, vlan, eth,
+                                       mode == MACVLAN_MODE_BRIDGE);
                        macvlan_count_rx(vlan, skb->len + ETH_HLEN,
                                         err == NET_RX_SUCCESS, 1);
                }
index 3a81315..6312332 100644 (file)
@@ -518,6 +518,135 @@ static const struct usb_device_id products[] = {
 
        /* 3. Combined interface devices matching on interface number */
        {QMI_FIXED_INTF(0x0408, 0xea42, 4)},    /* Yota / Megafon M100-1 */
+       {QMI_FIXED_INTF(0x05c6, 0x7000, 0)},
+       {QMI_FIXED_INTF(0x05c6, 0x7001, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x7002, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x7101, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x7101, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x7101, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x7102, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x7102, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x7102, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x8000, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x8001, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9000, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9003, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9005, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x900a, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x900b, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x900c, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x900c, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x900c, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x900d, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x900f, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x900f, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x900f, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9010, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9010, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9011, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9011, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9021, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x9022, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x9025, 4)},    /* Alcatel-sbell ASB TL131 TDD LTE  (China Mobile) */
+       {QMI_FIXED_INTF(0x05c6, 0x9026, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x902e, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9031, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9032, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9033, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9033, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9033, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9033, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9034, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9034, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9034, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9034, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9034, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9035, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9036, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9037, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9038, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x903b, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x903c, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x903d, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x903e, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9043, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9046, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9046, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9046, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9047, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x9047, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9047, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9048, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9048, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9048, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9048, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9048, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x904c, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x904c, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x904c, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x904c, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9050, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9052, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9053, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9053, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9054, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9054, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9055, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9055, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9055, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9055, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9055, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9056, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9062, 9)},
+       {QMI_FIXED_INTF(0x05c6, 0x9064, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9065, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9065, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9066, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9066, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9067, 1)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 2)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9068, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9069, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9069, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9069, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9069, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9070, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9070, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9075, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9076, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9076, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9076, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9076, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9076, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9077, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9077, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9077, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9077, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9078, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9079, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x9079, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9079, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9079, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9079, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9080, 5)},
+       {QMI_FIXED_INTF(0x05c6, 0x9080, 6)},
+       {QMI_FIXED_INTF(0x05c6, 0x9080, 7)},
+       {QMI_FIXED_INTF(0x05c6, 0x9080, 8)},
+       {QMI_FIXED_INTF(0x05c6, 0x9083, 3)},
+       {QMI_FIXED_INTF(0x05c6, 0x9084, 4)},
+       {QMI_FIXED_INTF(0x05c6, 0x920d, 0)},
+       {QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
        {QMI_FIXED_INTF(0x12d1, 0x140c, 1)},    /* Huawei E173 */
        {QMI_FIXED_INTF(0x12d1, 0x14ac, 1)},    /* Huawei E1820 */
        {QMI_FIXED_INTF(0x19d2, 0x0002, 1)},
@@ -612,7 +741,6 @@ static const struct usb_device_id products[] = {
        {QMI_GOBI_DEVICE(0x413c, 0x8186)},      /* Dell Gobi 2000 Modem device (N0218, VU936) */
        {QMI_GOBI_DEVICE(0x413c, 0x8194)},      /* Dell Gobi 3000 Composite */
        {QMI_GOBI_DEVICE(0x05c6, 0x920b)},      /* Generic Gobi 2000 Modem device */
-       {QMI_GOBI_DEVICE(0x05c6, 0x920d)},      /* Gobi 3000 Composite */
        {QMI_GOBI_DEVICE(0x05c6, 0x9225)},      /* Sony Gobi 2000 Modem device (N0279, VU730) */
        {QMI_GOBI_DEVICE(0x05c6, 0x9245)},      /* Samsung Gobi 2000 Modem device (VL176) */
        {QMI_GOBI_DEVICE(0x03f0, 0x251d)},      /* HP Gobi 2000 Modem device (VP412) */
index 15ea36b..cdafb8c 100644 (file)
@@ -41,7 +41,7 @@ config P54_PCI
 
 config P54_SPI
        tristate "Prism54 SPI (stlc45xx) support"
-       depends on P54_COMMON && SPI_MASTER && GENERIC_HARDIRQS
+       depends on P54_COMMON && SPI_MASTER
        ---help---
          This driver is for stlc4550 or stlc4560 based wireless chips
          such as Nokia's N800/N810 Portable Internet Tablet.
index 8fec4ed..477a206 100644 (file)
@@ -1,6 +1,6 @@
 menuconfig WL1251
        tristate "TI wl1251 driver support"
-       depends on MAC80211 && GENERIC_HARDIRQS
+       depends on MAC80211
        select FW_LOADER
        select CRC7
        ---help---
index 2b83282..7c09954 100644 (file)
@@ -1,6 +1,6 @@
 config WLCORE
        tristate "TI wlcore support"
-       depends on WL_TI && GENERIC_HARDIRQS && MAC80211
+       depends on WL_TI && MAC80211
        select FW_LOADER
        ---help---
          This module contains the main code for TI WLAN chips.  It abstracts
index a754b84..0fe40c7 100644 (file)
@@ -11,8 +11,6 @@
  * License or (at your optional) any later version of the license.
  */
 
-#include <asm/dma-contiguous.h>
-
 #include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/of.h>
index f6488ad..0b7d23b 100644 (file)
@@ -487,7 +487,6 @@ static void acpiphp_bus_add(acpi_handle handle)
 {
        struct acpi_device *adev = NULL;
 
-       acpiphp_bus_trim(handle);
        acpi_bus_scan(handle);
        acpi_bus_get_device(handle, &adev);
        if (adev)
@@ -529,6 +528,16 @@ static void check_hotplug_bridge(struct acpiphp_slot *slot, struct pci_dev *dev)
        }
 }
 
+static int acpiphp_rescan_slot(struct acpiphp_slot *slot)
+{
+       struct acpiphp_func *func;
+
+       list_for_each_entry(func, &slot->funcs, sibling)
+               acpiphp_bus_add(func_to_handle(func));
+
+       return pci_scan_slot(slot->bus, PCI_DEVFN(slot->device, 0));
+}
+
 /**
  * enable_slot - enable, configure a slot
  * @slot: slot to be enabled
@@ -543,12 +552,9 @@ static void __ref enable_slot(struct acpiphp_slot *slot)
        struct acpiphp_func *func;
        int max, pass;
        LIST_HEAD(add_list);
+       int nr_found;
 
-       list_for_each_entry(func, &slot->funcs, sibling)
-               acpiphp_bus_add(func_to_handle(func));
-
-       pci_scan_slot(bus, PCI_DEVFN(slot->device, 0));
-
+       nr_found = acpiphp_rescan_slot(slot);
        max = acpiphp_max_busnr(bus);
        for (pass = 0; pass < 2; pass++) {
                list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -567,8 +573,11 @@ static void __ref enable_slot(struct acpiphp_slot *slot)
                        }
                }
        }
-
        __pci_bus_assign_resources(bus, &add_list, NULL);
+       /* Nothing more to do here if there are no new devices on this bus. */
+       if (!nr_found && (slot->flags & SLOT_ENABLED))
+               return;
+
        acpiphp_sanitize_bus(bus);
        acpiphp_set_hpp_values(bus);
        acpiphp_set_acpi_region(slot);
@@ -837,11 +846,22 @@ static void hotplug_event(acpi_handle handle, u32 type, void *data)
        case ACPI_NOTIFY_DEVICE_CHECK:
                /* device check */
                dbg("%s: Device check notify on %s\n", __func__, objname);
-               if (bridge)
+               if (bridge) {
                        acpiphp_check_bridge(bridge);
-               else
-                       acpiphp_check_bridge(func->parent);
+               } else {
+                       struct acpiphp_slot *slot = func->slot;
+                       int ret;
 
+                       /*
+                        * Check if anything has changed in the slot and rescan
+                        * from the parent if that's the case.
+                        */
+                       mutex_lock(&slot->crit_sect);
+                       ret = acpiphp_rescan_slot(slot);
+                       mutex_unlock(&slot->crit_sect);
+                       if (ret)
+                               acpiphp_check_bridge(func->parent);
+               }
                break;
 
        case ACPI_NOTIFY_EJECT_REQUEST:
@@ -867,6 +887,8 @@ static void hotplug_event_work(struct work_struct *work)
        hotplug_event(hp_work->handle, hp_work->type, context);
 
        acpi_scan_lock_release();
+       acpi_evaluate_hotplug_ost(hp_work->handle, hp_work->type,
+                                 ACPI_OST_SC_SUCCESS, NULL);
        kfree(hp_work); /* allocated in handle_hotplug_event() */
        put_bridge(context->func.parent);
 }
@@ -882,11 +904,15 @@ static void hotplug_event_work(struct work_struct *work)
 static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
 {
        struct acpiphp_context *context;
+       u32 ost_code = ACPI_OST_SC_SUCCESS;
 
        switch (type) {
        case ACPI_NOTIFY_BUS_CHECK:
        case ACPI_NOTIFY_DEVICE_CHECK:
+               break;
        case ACPI_NOTIFY_EJECT_REQUEST:
+               ost_code = ACPI_OST_SC_EJECT_IN_PROGRESS;
+               acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
                break;
 
        case ACPI_NOTIFY_DEVICE_WAKE:
@@ -895,20 +921,21 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
        case ACPI_NOTIFY_FREQUENCY_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a frequency mismatch\n");
-               return;
+               goto out;
 
        case ACPI_NOTIFY_BUS_MODE_MISMATCH:
                acpi_handle_err(handle, "Device cannot be configured due "
                                "to a bus mode mismatch\n");
-               return;
+               goto out;
 
        case ACPI_NOTIFY_POWER_FAULT:
                acpi_handle_err(handle, "Device has suffered a power fault\n");
-               return;
+               goto out;
 
        default:
                acpi_handle_warn(handle, "Unsupported event type 0x%x\n", type);
-               return;
+               ost_code = ACPI_OST_SC_UNRECOGNIZED_NOTIFY;
+               goto out;
        }
 
        mutex_lock(&acpiphp_context_lock);
@@ -917,8 +944,14 @@ static void handle_hotplug_event(acpi_handle handle, u32 type, void *data)
                get_bridge(context->func.parent);
                acpiphp_put_context(context);
                alloc_acpi_hp_work(handle, type, context, hotplug_event_work);
+               mutex_unlock(&acpiphp_context_lock);
+               return;
        }
        mutex_unlock(&acpiphp_context_lock);
+       ost_code = ACPI_OST_SC_NON_SPECIFIC_FAILURE;
+
+ out:
+       acpi_evaluate_hotplug_ost(handle, type, ost_code, NULL);
 }
 
 /*
index b35f93c..d5f90d6 100644 (file)
@@ -30,7 +30,6 @@ static int pci_msi_enable = 1;
 
 /* Arch hooks */
 
-#if defined(CONFIG_GENERIC_HARDIRQS)
 int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
        struct msi_chip *chip = dev->bus->msi;
@@ -67,21 +66,6 @@ int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
 
        return chip->check_device(chip, dev, nvec, type);
 }
-#else
-int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
-{
-       return -ENOSYS;
-}
-
-void __weak arch_teardown_msi_irq(unsigned int irq)
-{
-}
-
-int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
-{
-       return 0;
-}
-#endif /* CONFIG_GENERIC_HARDIRQS */
 
 int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
@@ -245,8 +229,6 @@ static void msix_mask_irq(struct msi_desc *desc, u32 flag)
        desc->masked = __msix_mask_irq(desc, flag);
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 {
        struct msi_desc *desc = irq_data_get_msi(data);
@@ -270,8 +252,6 @@ void unmask_msi_irq(struct irq_data *data)
        msi_set_mask_bit(data, 0);
 }
 
-#endif /* CONFIG_GENERIC_HARDIRQS */
-
 void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
        BUG_ON(entry->dev->current_state != PCI_D0);
@@ -382,10 +362,8 @@ static void free_msi_irqs(struct pci_dev *dev)
                        nvec = entry->nvec_used;
                else
                        nvec = 1 << entry->msi_attrib.multiple;
-#ifdef CONFIG_GENERIC_HARDIRQS
                for (i = 0; i < nvec; i++)
                        BUG_ON(irq_has_action(entry->irq + i));
-#endif
        }
 
        arch_teardown_msi_irqs(dev);
index 36a9e60..96d6b2e 100644 (file)
@@ -732,6 +732,7 @@ config SAMSUNG_LAPTOP
        tristate "Samsung Laptop driver"
        depends on X86
        depends on RFKILL || RFKILL = n
+       depends on ACPI_VIDEO || ACPI_VIDEO = n
        depends on BACKLIGHT_CLASS_DEVICE
        select LEDS_CLASS
        select NEW_LEDS
@@ -764,7 +765,7 @@ config INTEL_OAKTRAIL
 
 config SAMSUNG_Q10
        tristate "Samsung Q10 Extras"
-       depends on SERIO_I8042
+       depends on ACPI
        select BACKLIGHT_CLASS_DEVICE
        ---help---
          This driver provides support for backlight control on Samsung Q10
index 6296f07..da36b5e 100644 (file)
@@ -82,6 +82,13 @@ static const struct dmi_system_id amilo_rfkill_id_table[] = {
                },
                .driver_data = (void *)&amilo_a1655_rfkill_ops
        },
+       {
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
+                       DMI_MATCH(DMI_BOARD_NAME, "AMILO L1310"),
+               },
+               .driver_data = (void *)&amilo_a1655_rfkill_ops
+       },
        {
                .matches = {
                        DMI_MATCH(DMI_SYS_VENDOR, "FUJITSU SIEMENS"),
index f74bfcb..8eea2ef 100644 (file)
@@ -393,17 +393,21 @@ static void gmux_notify_handler(acpi_handle device, u32 value, void *context)
                complete(&gmux_data->powerchange_done);
 }
 
-static int gmux_suspend(struct pnp_dev *pnp, pm_message_t state)
+static int gmux_suspend(struct device *dev)
 {
+       struct pnp_dev *pnp = to_pnp_dev(dev);
        struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp);
+
        gmux_data->resume_client_id = gmux_active_client(gmux_data);
        gmux_disable_interrupts(gmux_data);
        return 0;
 }
 
-static int gmux_resume(struct pnp_dev *pnp)
+static int gmux_resume(struct device *dev)
 {
+       struct pnp_dev *pnp = to_pnp_dev(dev);
        struct apple_gmux_data *gmux_data = pnp_get_drvdata(pnp);
+
        gmux_enable_interrupts(gmux_data);
        gmux_switchto(gmux_data->resume_client_id);
        if (gmux_data->power_state == VGA_SWITCHEROO_OFF)
@@ -605,13 +609,19 @@ static const struct pnp_device_id gmux_device_ids[] = {
        {"", 0}
 };
 
+static const struct dev_pm_ops gmux_dev_pm_ops = {
+       .suspend = gmux_suspend,
+       .resume = gmux_resume,
+};
+
 static struct pnp_driver gmux_pnp_driver = {
        .name           = "apple-gmux",
        .probe          = gmux_probe,
        .remove         = gmux_remove,
        .id_table       = gmux_device_ids,
-       .suspend        = gmux_suspend,
-       .resume         = gmux_resume
+       .driver         = {
+                       .pm = &gmux_dev_pm_ops,
+       },
 };
 
 static int __init apple_gmux_init(void)
index 36e5e6c..6dfa8d3 100644 (file)
@@ -590,7 +590,7 @@ static ssize_t cmpc_accel_sensitivity_store(struct device *dev,
        inputdev = dev_get_drvdata(&acpi->dev);
        accel = dev_get_drvdata(&inputdev->dev);
 
-       r = strict_strtoul(buf, 0, &sensitivity);
+       r = kstrtoul(buf, 0, &sensitivity);
        if (r)
                return r;
 
index 475cc52..eaa78ed 100644 (file)
@@ -425,7 +425,8 @@ static ssize_t pwm_enable_store(struct device *dev,
        struct compal_data *data = dev_get_drvdata(dev);
        long val;
        int err;
-       err = strict_strtol(buf, 10, &val);
+
+       err = kstrtol(buf, 10, &val);
        if (err)
                return err;
        if (val < 0)
@@ -463,7 +464,8 @@ static ssize_t pwm_store(struct device *dev, struct device_attribute *attr,
        struct compal_data *data = dev_get_drvdata(dev);
        long val;
        int err;
-       err = strict_strtol(buf, 10, &val);
+
+       err = kstrtol(buf, 10, &val);
        if (err)
                return err;
        if (val < 0 || val > 255)
@@ -1081,7 +1083,6 @@ static int compal_remove(struct platform_device *pdev)
        hwmon_device_unregister(data->hwmon_dev);
        power_supply_unregister(&data->psy);
 
-       platform_set_drvdata(pdev, NULL);
        kfree(data);
 
        sysfs_remove_group(&pdev->dev.kobj, &compal_attribute_group);
index d6970f4..1c86fa0 100644 (file)
@@ -725,7 +725,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
                                           (void *) HPWMI_WWAN);
                if (!wwan_rfkill) {
                        err = -ENOMEM;
-                       goto register_gps_error;
+                       goto register_bluetooth_error;
                }
                rfkill_init_sw_state(wwan_rfkill,
                                     hp_wmi_get_sw_state(HPWMI_WWAN));
@@ -733,7 +733,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
                                    hp_wmi_get_hw_state(HPWMI_WWAN));
                err = rfkill_register(wwan_rfkill);
                if (err)
-                       goto register_wwan_err;
+                       goto register_wwan_error;
        }
 
        if (wireless & 0x8) {
@@ -743,7 +743,7 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
                                                (void *) HPWMI_GPS);
                if (!gps_rfkill) {
                        err = -ENOMEM;
-                       goto register_bluetooth_error;
+                       goto register_wwan_error;
                }
                rfkill_init_sw_state(gps_rfkill,
                                     hp_wmi_get_sw_state(HPWMI_GPS));
@@ -755,16 +755,16 @@ static int hp_wmi_rfkill_setup(struct platform_device *device)
        }
 
        return 0;
-register_wwan_err:
-       rfkill_destroy(wwan_rfkill);
-       wwan_rfkill = NULL;
-       if (gps_rfkill)
-               rfkill_unregister(gps_rfkill);
 register_gps_error:
        rfkill_destroy(gps_rfkill);
        gps_rfkill = NULL;
        if (bluetooth_rfkill)
                rfkill_unregister(bluetooth_rfkill);
+register_wwan_error:
+       rfkill_destroy(wwan_rfkill);
+       wwan_rfkill = NULL;
+       if (gps_rfkill)
+               rfkill_unregister(gps_rfkill);
 register_bluetooth_error:
        rfkill_destroy(bluetooth_rfkill);
        bluetooth_rfkill = NULL;
index 9385afd..41b740c 100644 (file)
@@ -193,17 +193,6 @@ static struct acpi_driver irst_driver = {
        },
 };
 
-static int irst_init(void)
-{
-       return acpi_bus_register_driver(&irst_driver);
-}
-
-static void irst_exit(void)
-{
-       acpi_bus_unregister_driver(&irst_driver);
-}
-
-module_init(irst_init);
-module_exit(irst_exit);
+module_acpi_driver(irst_driver);
 
 MODULE_DEVICE_TABLE(acpi, irst_ids);
index f74e93d..52259dc 100644 (file)
@@ -74,17 +74,6 @@ static struct acpi_driver smartconnect_driver = {
        },
 };
 
-static int smartconnect_init(void)
-{
-       return acpi_bus_register_driver(&smartconnect_driver);
-}
-
-static void smartconnect_exit(void)
-{
-       acpi_bus_unregister_driver(&smartconnect_driver);
-}
-
-module_init(smartconnect_init);
-module_exit(smartconnect_exit);
+module_acpi_driver(smartconnect_driver);
 
 MODULE_DEVICE_TABLE(acpi, smartconnect_ids);
index f59683a..6b18aba 100644 (file)
@@ -128,7 +128,6 @@ static int mfld_pb_remove(struct platform_device *pdev)
 
        free_irq(irq, input);
        input_unregister_device(input);
-       platform_set_drvdata(pdev, NULL);
 
        return 0;
 }
index 81c491e..93fab8b 100644 (file)
@@ -542,7 +542,6 @@ static int mid_thermal_remove(struct platform_device *pdev)
        }
 
        kfree(pinfo);
-       platform_set_drvdata(pdev, NULL);
 
        /* Stop the ADC */
        return configure_adc(0);
index 984253d..10d12b2 100644 (file)
@@ -643,23 +643,6 @@ out_hotkey:
        return result;
 }
 
-static int __init acpi_pcc_init(void)
-{
-       int result = 0;
-
-       if (acpi_disabled)
-               return -ENODEV;
-
-       result = acpi_bus_register_driver(&acpi_pcc_driver);
-       if (result < 0) {
-               ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
-                                 "Error registering hotkey driver\n"));
-               return -ENODEV;
-       }
-
-       return 0;
-}
-
 static int acpi_pcc_hotkey_remove(struct acpi_device *device)
 {
        struct pcc_acpi *pcc = acpi_driver_data(device);
@@ -679,10 +662,4 @@ static int acpi_pcc_hotkey_remove(struct acpi_device *device)
        return 0;
 }
 
-static void __exit acpi_pcc_exit(void)
-{
-       acpi_bus_unregister_driver(&acpi_pcc_driver);
-}
-
-module_init(acpi_pcc_init);
-module_exit(acpi_pcc_exit);
+module_acpi_driver(acpi_pcc_driver);
index 4430b8c..cae7098 100644 (file)
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <linux/backlight.h>
-#include <linux/i8042.h>
 #include <linux/dmi.h>
+#include <acpi/acpi_drivers.h>
 
-#define SAMSUNGQ10_BL_MAX_INTENSITY      255
-#define SAMSUNGQ10_BL_DEFAULT_INTENSITY  185
+#define SAMSUNGQ10_BL_MAX_INTENSITY 7
 
-#define SAMSUNGQ10_BL_8042_CMD           0xbe
-#define SAMSUNGQ10_BL_8042_DATA          { 0x89, 0x91 }
-
-static int samsungq10_bl_brightness;
+static acpi_handle ec_handle;
 
 static bool force;
 module_param(force, bool, 0);
@@ -33,21 +29,26 @@ MODULE_PARM_DESC(force,
 static int samsungq10_bl_set_intensity(struct backlight_device *bd)
 {
 
-       int brightness = bd->props.brightness;
-       unsigned char c[3] = SAMSUNGQ10_BL_8042_DATA;
+       acpi_status status;
+       int i;
 
-       c[2] = (unsigned char)brightness;
-       i8042_lock_chip();
-       i8042_command(c, (0x30 << 8) | SAMSUNGQ10_BL_8042_CMD);
-       i8042_unlock_chip();
-       samsungq10_bl_brightness = brightness;
+       for (i = 0; i < SAMSUNGQ10_BL_MAX_INTENSITY; i++) {
+               status = acpi_evaluate_object(ec_handle, "_Q63", NULL, NULL);
+               if (ACPI_FAILURE(status))
+                       return -EIO;
+       }
+       for (i = 0; i < bd->props.brightness; i++) {
+               status = acpi_evaluate_object(ec_handle, "_Q64", NULL, NULL);
+               if (ACPI_FAILURE(status))
+                       return -EIO;
+       }
 
        return 0;
 }
 
 static int samsungq10_bl_get_intensity(struct backlight_device *bd)
 {
-       return samsungq10_bl_brightness;
+       return bd->props.brightness;
 }
 
 static const struct backlight_ops samsungq10_bl_ops = {
@@ -55,28 +56,6 @@ static const struct backlight_ops samsungq10_bl_ops = {
        .update_status  = samsungq10_bl_set_intensity,
 };
 
-#ifdef CONFIG_PM_SLEEP
-static int samsungq10_suspend(struct device *dev)
-{
-       return 0;
-}
-
-static int samsungq10_resume(struct device *dev)
-{
-
-       struct backlight_device *bd = dev_get_drvdata(dev);
-
-       samsungq10_bl_set_intensity(bd);
-       return 0;
-}
-#else
-#define samsungq10_suspend NULL
-#define samsungq10_resume  NULL
-#endif
-
-static SIMPLE_DEV_PM_OPS(samsungq10_pm_ops,
-                         samsungq10_suspend, samsungq10_resume);
-
 static int samsungq10_probe(struct platform_device *pdev)
 {
 
@@ -93,9 +72,6 @@ static int samsungq10_probe(struct platform_device *pdev)
 
        platform_set_drvdata(pdev, bd);
 
-       bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY;
-       samsungq10_bl_set_intensity(bd);
-
        return 0;
 }
 
@@ -104,9 +80,6 @@ static int samsungq10_remove(struct platform_device *pdev)
 
        struct backlight_device *bd = platform_get_drvdata(pdev);
 
-       bd->props.brightness = SAMSUNGQ10_BL_DEFAULT_INTENSITY;
-       samsungq10_bl_set_intensity(bd);
-
        backlight_device_unregister(bd);
 
        return 0;
@@ -116,7 +89,6 @@ static struct platform_driver samsungq10_driver = {
        .driver         = {
                .name   = KBUILD_MODNAME,
                .owner  = THIS_MODULE,
-               .pm     = &samsungq10_pm_ops,
        },
        .probe          = samsungq10_probe,
        .remove         = samsungq10_remove,
@@ -172,6 +144,11 @@ static int __init samsungq10_init(void)
        if (!force && !dmi_check_system(samsungq10_dmi_table))
                return -ENODEV;
 
+       ec_handle = ec_get_handle();
+
+       if (!ec_handle)
+               return -ENODEV;
+
        samsungq10_device = platform_create_bundle(&samsungq10_driver,
                                                   samsungq10_probe,
                                                   NULL, 0, NULL, 0);
index be67e5e..03ca6c1 100644 (file)
@@ -369,7 +369,7 @@ struct tpacpi_led_classdev {
        struct led_classdev led_classdev;
        struct work_struct work;
        enum led_status_t new_state;
-       unsigned int led;
+       int led;
 };
 
 /* brightness level capabilities */
@@ -5296,6 +5296,16 @@ static int __init led_init(struct ibm_init_struct *iibm)
 
        led_supported = led_init_detect_mode();
 
+       if (led_supported != TPACPI_LED_NONE) {
+               useful_leds = tpacpi_check_quirks(led_useful_qtable,
+                               ARRAY_SIZE(led_useful_qtable));
+
+               if (!useful_leds) {
+                       led_handle = NULL;
+                       led_supported = TPACPI_LED_NONE;
+               }
+       }
+
        vdbg_printk(TPACPI_DBG_INIT, "LED commands are %s, mode %d\n",
                str_supported(led_supported), led_supported);
 
@@ -5309,10 +5319,9 @@ static int __init led_init(struct ibm_init_struct *iibm)
                return -ENOMEM;
        }
 
-       useful_leds = tpacpi_check_quirks(led_useful_qtable,
-                                         ARRAY_SIZE(led_useful_qtable));
-
        for (i = 0; i < TPACPI_LED_NUMLEDS; i++) {
+               tpacpi_leds[i].led = -1;
+
                if (!tpacpi_is_led_restricted(i) &&
                    test_bit(i, &useful_leds)) {
                        rc = tpacpi_init_led(i);
@@ -5370,9 +5379,13 @@ static int led_write(char *buf)
                return -ENODEV;
 
        while ((cmd = next_cmd(&buf))) {
-               if (sscanf(cmd, "%d", &led) != 1 || led < 0 || led > 15)
+               if (sscanf(cmd, "%d", &led) != 1)
                        return -EINVAL;
 
+               if (led < 0 || led > (TPACPI_LED_NUMLEDS - 1) ||
+                               tpacpi_leds[led].led < 0)
+                       return -ENODEV;
+
                if (strstr(cmd, "off")) {
                        s = TPACPI_LED_OFF;
                } else if (strstr(cmd, "on")) {
index 6e02c95..601ea95 100644 (file)
@@ -780,7 +780,7 @@ static bool guid_already_parsed(const char *guid_string)
 /*
  * Parse the _WDG method for the GUID data blocks
  */
-static acpi_status parse_wdg(acpi_handle handle)
+static int parse_wdg(acpi_handle handle)
 {
        struct acpi_buffer out = {ACPI_ALLOCATE_BUFFER, NULL};
        union acpi_object *obj;
@@ -812,7 +812,7 @@ static acpi_status parse_wdg(acpi_handle handle)
 
                wblock = kzalloc(sizeof(struct wmi_block), GFP_KERNEL);
                if (!wblock)
-                       return AE_NO_MEMORY;
+                       return -ENOMEM;
 
                wblock->handle = handle;
                wblock->gblock = gblock[i];
index 12adb43..a39ee38 100644 (file)
@@ -163,6 +163,13 @@ static int __pnp_bus_suspend(struct device *dev, pm_message_t state)
        if (!pnp_drv)
                return 0;
 
+       if (pnp_drv->driver.pm && pnp_drv->driver.pm->suspend) {
+               error = pnp_drv->driver.pm->suspend(dev);
+               suspend_report_result(pnp_drv->driver.pm->suspend, error);
+               if (error)
+                       return error;
+       }
+
        if (pnp_drv->suspend) {
                error = pnp_drv->suspend(pnp_dev, state);
                if (error)
@@ -211,6 +218,12 @@ static int pnp_bus_resume(struct device *dev)
                        return error;
        }
 
+       if (pnp_drv->driver.pm && pnp_drv->driver.pm->resume) {
+               error = pnp_drv->driver.pm->resume(dev);
+               if (error)
+                       return error;
+       }
+
        if (pnp_drv->resume) {
                error = pnp_drv->resume(pnp_dev);
                if (error)
index bb49ab6..e6f92b4 100644 (file)
@@ -269,7 +269,6 @@ config CHARGER_ISP1704
 
 config CHARGER_MAX8903
        tristate "MAX8903 Battery DC-DC Charger for USB and Adapter Power"
-       depends on GENERIC_HARDIRQS
        help
          Say Y to enable support for the MAX8903 DC-DC charger and sysfs.
          The driver supports controlling charger-enable and current-limit
@@ -370,7 +369,7 @@ config AB8500_BM
 
 config BATTERY_GOLDFISH
        tristate "Goldfish battery driver"
-       depends on GENERIC_HARDIRQS && (GOLDFISH || COMPILE_TEST)
+       depends on GOLDFISH || COMPILE_TEST
        help
          Say Y to enable support for the battery and AC power in the
          Goldfish emulator.
index 6efd9b6..0c9f280 100644 (file)
@@ -31,7 +31,7 @@ config PPS_CLIENT_PARPORT
 
 config PPS_CLIENT_GPIO
        tristate "PPS client using GPIO"
-       depends on PPS && GENERIC_HARDIRQS
+       depends on PPS
        help
          If you say yes here you get support for a PPS source using
          GPIO. To be useful you must also register a platform device
index eae0eda..9966124 100644 (file)
@@ -184,7 +184,6 @@ static int pps_gpio_remove(struct platform_device *pdev)
 {
        struct pps_gpio_device_data *data = platform_get_drvdata(pdev);
 
-       platform_set_drvdata(pdev, NULL);
        pps_unregister_source(data->pps);
        dev_info(&pdev->dev, "removed IRQ %d as PPS source\n", data->irq);
        return 0;
index 9e3498b..9654aa3 100644 (file)
@@ -1249,6 +1249,15 @@ config RTC_DRV_SIRFSOC
          Say "yes" here to support the real time clock on SiRF SOC chips.
          This driver can also be built as a module called rtc-sirfsoc.
 
+config RTC_DRV_MOXART
+       tristate "MOXA ART RTC"
+       help
+          If you say yes here you get support for the MOXA ART
+          RTC module.
+
+          This driver can also be built as a module. If so, the module
+          will be called rtc-moxart
+
 comment "HID Sensor RTC drivers"
 
 config RTC_DRV_HID_SENSOR_TIME
index d3b4488..2dff3d2 100644 (file)
@@ -130,3 +130,4 @@ obj-$(CONFIG_RTC_DRV_WM831X)        += rtc-wm831x.o
 obj-$(CONFIG_RTC_DRV_WM8350)   += rtc-wm8350.o
 obj-$(CONFIG_RTC_DRV_X1205)    += rtc-x1205.o
 obj-$(CONFIG_RTC_DRV_SIRFSOC)  += rtc-sirfsoc.o
+obj-$(CONFIG_RTC_DRV_MOXART)   += rtc-moxart.o
index be06d71..24e733c 100644 (file)
@@ -1018,23 +1018,6 @@ static void __exit cmos_pnp_remove(struct pnp_dev *pnp)
        cmos_do_remove(&pnp->dev);
 }
 
-#ifdef CONFIG_PM
-
-static int cmos_pnp_suspend(struct pnp_dev *pnp, pm_message_t mesg)
-{
-       return cmos_suspend(&pnp->dev);
-}
-
-static int cmos_pnp_resume(struct pnp_dev *pnp)
-{
-       return cmos_resume(&pnp->dev);
-}
-
-#else
-#define        cmos_pnp_suspend        NULL
-#define        cmos_pnp_resume         NULL
-#endif
-
 static void cmos_pnp_shutdown(struct pnp_dev *pnp)
 {
        if (system_state == SYSTEM_POWER_OFF && !cmos_poweroff(&pnp->dev))
@@ -1060,8 +1043,11 @@ static struct pnp_driver cmos_pnp_driver = {
 
        /* flag ensures resume() gets called, and stops syslog spam */
        .flags          = PNP_DRIVER_RES_DO_NOT_CHANGE,
-       .suspend        = cmos_pnp_suspend,
-       .resume         = cmos_pnp_resume,
+#ifdef CONFIG_PM_SLEEP
+       .driver         = {
+                       .pm = &cmos_pm_ops,
+       },
+#endif
 };
 
 #endif /* CONFIG_PNP */
index 308a8fe..bc7b4fc 100644 (file)
@@ -89,7 +89,6 @@ enum ds1511reg {
 struct rtc_plat_data {
        struct rtc_device *rtc;
        void __iomem *ioaddr;           /* virtual base address */
-       int size;                               /* amount of memory mapped */
        int irq;
        unsigned int irqen;
        int alrm_sec;
@@ -479,20 +478,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
        struct rtc_plat_data *pdata;
        int ret = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
-       pdata->size = resource_size(res);
-       if (!devm_request_mem_region(&pdev->dev, res->start, pdata->size,
-                       pdev->name))
-               return -EBUSY;
-       ds1511_base = devm_ioremap(&pdev->dev, res->start, pdata->size);
-       if (!ds1511_base)
-               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ds1511_base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ds1511_base))
+               return PTR_ERR(ds1511_base);
        pdata->ioaddr = ds1511_base;
        pdata->irq = platform_get_irq(pdev, 0);
 
index 8c6c952..fd31571 100644 (file)
@@ -285,19 +285,14 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
        void __iomem *ioaddr;
        int ret = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
-       if (!devm_request_mem_region(&pdev->dev, res->start, RTC_REG_SIZE,
-                       pdev->name))
-               return -EBUSY;
 
-       ioaddr = devm_ioremap(&pdev->dev, res->start, RTC_REG_SIZE);
-       if (!ioaddr)
-               return -ENOMEM;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ioaddr))
+               return PTR_ERR(ioaddr);
        pdata->ioaddr = ioaddr;
        pdata->irq = platform_get_irq(pdev, 0);
 
index eccdc62..17b73fd 100644 (file)
 #define RTC_BATT_FLAG          0x80
 
 struct rtc_plat_data {
-       struct rtc_device *rtc;
        void __iomem *ioaddr_nvram;
        void __iomem *ioaddr_rtc;
        size_t size_nvram;
-       size_t size;
        unsigned long last_jiffies;
        struct bin_attribute nvram_attr;
 };
@@ -117,11 +115,7 @@ static int ds1742_rtc_read_time(struct device *dev, struct rtc_time *tm)
        /* year is 1900 + tm->tm_year */
        tm->tm_year = bcd2bin(year) + bcd2bin(century) * 100 - 1900;
 
-       if (rtc_valid_tm(tm) < 0) {
-               dev_err(dev, "retrieved date/time is not valid.\n");
-               rtc_time_to_tm(0, tm);
-       }
-       return 0;
+       return rtc_valid_tm(tm);
 }
 
 static const struct rtc_class_ops ds1742_rtc_ops = {
@@ -168,22 +162,17 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
        void __iomem *ioaddr;
        int ret = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
-       pdata->size = resource_size(res);
-       if (!devm_request_mem_region(&pdev->dev, res->start, pdata->size,
-               pdev->name))
-               return -EBUSY;
-       ioaddr = devm_ioremap(&pdev->dev, res->start, pdata->size);
-       if (!ioaddr)
-               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ioaddr))
+               return PTR_ERR(ioaddr);
 
        pdata->ioaddr_nvram = ioaddr;
-       pdata->size_nvram = pdata->size - RTC_SIZE;
+       pdata->size_nvram = resource_size(res) - RTC_SIZE;
        pdata->ioaddr_rtc = ioaddr + pdata->size_nvram;
 
        sysfs_bin_attr_init(&pdata->nvram_attr);
@@ -212,7 +201,6 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
                                  &ds1742_rtc_ops, THIS_MODULE);
        if (IS_ERR(rtc))
                return PTR_ERR(rtc);
-       pdata->rtc = rtc;
 
        ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
 
index 549b3c3..580e7b5 100644 (file)
@@ -138,17 +138,9 @@ static int ep93xx_rtc_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENXIO;
-
-       if (!devm_request_mem_region(&pdev->dev, res->start,
-                                    resource_size(res), pdev->name))
-               return -EBUSY;
-
-       ep93xx_rtc->mmio_base = devm_ioremap(&pdev->dev, res->start,
-                                            resource_size(res));
-       if (!ep93xx_rtc->mmio_base)
-               return -ENXIO;
+       ep93xx_rtc->mmio_base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ep93xx_rtc->mmio_base))
+               return PTR_ERR(ep93xx_rtc->mmio_base);
 
        pdev->dev.platform_data = ep93xx_rtc;
        platform_set_drvdata(pdev, ep93xx_rtc);
index 7273b01..4e2a818 100644 (file)
 #include <linux/iio/iio.h>
 #include <linux/rtc.h>
 
-/* Format: HID-SENSOR-usage_id_in_hex */
-/* Usage ID from spec for Time: 0x2000A0 */
-#define DRIVER_NAME "HID-SENSOR-2000a0" /* must be lowercase */
-
 enum hid_time_channel {
        CHANNEL_SCAN_INDEX_YEAR,
        CHANNEL_SCAN_INDEX_MONTH,
@@ -283,9 +279,11 @@ static int hid_time_probe(struct platform_device *pdev)
                                        "hid-sensor-time", &hid_time_rtc_ops,
                                        THIS_MODULE);
 
-       if (IS_ERR(time_state->rtc)) {
+       if (IS_ERR_OR_NULL(time_state->rtc)) {
+               ret = time_state->rtc ? PTR_ERR(time_state->rtc) : -ENODEV;
+               time_state->rtc = NULL;
+               sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME);
                dev_err(&pdev->dev, "rtc device register failed!\n");
-               return PTR_ERR(time_state->rtc);
        }
 
        return ret;
@@ -300,9 +298,19 @@ static int hid_time_remove(struct platform_device *pdev)
        return 0;
 }
 
+static struct platform_device_id hid_time_ids[] = {
+       {
+               /* Format: HID-SENSOR-usage_id_in_hex_lowercase */
+               .name = "HID-SENSOR-2000a0",
+       },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(platform, hid_time_ids);
+
 static struct platform_driver hid_time_platform_driver = {
+       .id_table = hid_time_ids,
        .driver = {
-               .name   = DRIVER_NAME,
+               .name   = KBUILD_MODNAME,
                .owner  = THIS_MODULE,
        },
        .probe          = hid_time_probe,
index d3a8c8e..abd7f90 100644 (file)
@@ -375,24 +375,16 @@ static int __init dryice_rtc_probe(struct platform_device *pdev)
        struct imxdi_dev *imxdi;
        int rc;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        imxdi = devm_kzalloc(&pdev->dev, sizeof(*imxdi), GFP_KERNEL);
        if (!imxdi)
                return -ENOMEM;
 
        imxdi->pdev = pdev;
 
-       if (!devm_request_mem_region(&pdev->dev, res->start, resource_size(res),
-                               pdev->name))
-               return -EBUSY;
-
-       imxdi->ioaddr = devm_ioremap(&pdev->dev, res->start,
-                       resource_size(res));
-       if (imxdi->ioaddr == NULL)
-               return -ENOMEM;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       imxdi->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(imxdi->ioaddr))
+               return PTR_ERR(imxdi->ioaddr);
 
        spin_lock_init(&imxdi->irq_lock);
 
index 8276ae9..bfdbcb8 100644 (file)
@@ -201,16 +201,9 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
        struct lpc32xx_rtc *rtc;
-       resource_size_t size;
        int rtcirq;
        u32 tmp;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res) {
-               dev_err(&pdev->dev, "Can't get memory resource\n");
-               return -ENOENT;
-       }
-
        rtcirq = platform_get_irq(pdev, 0);
        if (rtcirq < 0 || rtcirq >= NR_IRQS) {
                dev_warn(&pdev->dev, "Can't get interrupt resource\n");
@@ -224,19 +217,10 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev)
        }
        rtc->irq = rtcirq;
 
-       size = resource_size(res);
-
-       if (!devm_request_mem_region(&pdev->dev, res->start, size,
-                                    pdev->name)) {
-               dev_err(&pdev->dev, "RTC registers are not free\n");
-               return -EBUSY;
-       }
-
-       rtc->rtc_base = devm_ioremap(&pdev->dev, res->start, size);
-       if (!rtc->rtc_base) {
-               dev_err(&pdev->dev, "Can't map memory\n");
-               return -ENOMEM;
-       }
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       rtc->rtc_base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(rtc->rtc_base))
+               return PTR_ERR(rtc->rtc_base);
 
        spin_lock_init(&rtc->lock);
 
index 9915cb9..9efe118 100644 (file)
@@ -240,9 +240,9 @@ static int max77686_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
        }
 
        alrm->pending = 0;
-       ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS1, &val);
+       ret = regmap_read(info->max77686->regmap, MAX77686_REG_STATUS2, &val);
        if (ret < 0) {
-               dev_err(info->dev, "%s:%d fail to read status1 reg(%d)\n",
+               dev_err(info->dev, "%s:%d fail to read status2 reg(%d)\n",
                                __func__, __LINE__, ret);
                goto out;
        }
diff --git a/drivers/rtc/rtc-moxart.c b/drivers/rtc/rtc-moxart.c
new file mode 100644 (file)
index 0000000..c29dee0
--- /dev/null
@@ -0,0 +1,330 @@
+/*
+ * MOXA ART RTC driver.
+ *
+ * Copyright (C) 2013 Jonas Jensen
+ *
+ * Jonas Jensen <jonas.jensen@gmail.com>
+ *
+ * Based on code from
+ * Moxa Technology Co., Ltd. <www.moxa.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2.  This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/delay.h>
+#include <linux/rtc.h>
+#include <linux/platform_device.h>
+#include <linux/module.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+
+#define GPIO_RTC_RESERVED                      0x0C
+#define GPIO_RTC_DATA_SET                      0x10
+#define GPIO_RTC_DATA_CLEAR                    0x14
+#define GPIO_RTC_PIN_PULL_ENABLE               0x18
+#define GPIO_RTC_PIN_PULL_TYPE                 0x1C
+#define GPIO_RTC_INT_ENABLE                    0x20
+#define GPIO_RTC_INT_RAW_STATE                 0x24
+#define GPIO_RTC_INT_MASKED_STATE              0x28
+#define GPIO_RTC_INT_MASK                      0x2C
+#define GPIO_RTC_INT_CLEAR                     0x30
+#define GPIO_RTC_INT_TRIGGER                   0x34
+#define GPIO_RTC_INT_BOTH                      0x38
+#define GPIO_RTC_INT_RISE_NEG                  0x3C
+#define GPIO_RTC_BOUNCE_ENABLE                 0x40
+#define GPIO_RTC_BOUNCE_PRE_SCALE              0x44
+#define GPIO_RTC_PROTECT_W                     0x8E
+#define GPIO_RTC_PROTECT_R                     0x8F
+#define GPIO_RTC_YEAR_W                                0x8C
+#define GPIO_RTC_YEAR_R                                0x8D
+#define GPIO_RTC_DAY_W                         0x8A
+#define GPIO_RTC_DAY_R                         0x8B
+#define GPIO_RTC_MONTH_W                       0x88
+#define GPIO_RTC_MONTH_R                       0x89
+#define GPIO_RTC_DATE_W                                0x86
+#define GPIO_RTC_DATE_R                                0x87
+#define GPIO_RTC_HOURS_W                       0x84
+#define GPIO_RTC_HOURS_R                       0x85
+#define GPIO_RTC_MINUTES_W                     0x82
+#define GPIO_RTC_MINUTES_R                     0x83
+#define GPIO_RTC_SECONDS_W                     0x80
+#define GPIO_RTC_SECONDS_R                     0x81
+#define GPIO_RTC_DELAY_TIME                    8
+
+struct moxart_rtc {
+       struct rtc_device *rtc;
+       spinlock_t rtc_lock;
+       int gpio_data, gpio_sclk, gpio_reset;
+};
+
+static int day_of_year[12] =   { 0, 31, 59, 90, 120, 151, 181,
+                                 212, 243, 273, 304, 334 };
+
+static void moxart_rtc_write_byte(struct device *dev, u8 data)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+       int i;
+
+       for (i = 0; i < 8; i++, data >>= 1) {
+               gpio_set_value(moxart_rtc->gpio_sclk, 0);
+               gpio_set_value(moxart_rtc->gpio_data, ((data & 1) == 1));
+               udelay(GPIO_RTC_DELAY_TIME);
+               gpio_set_value(moxart_rtc->gpio_sclk, 1);
+               udelay(GPIO_RTC_DELAY_TIME);
+       }
+}
+
+static u8 moxart_rtc_read_byte(struct device *dev)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+       int i;
+       u8 data = 0;
+
+       for (i = 0; i < 8; i++) {
+               gpio_set_value(moxart_rtc->gpio_sclk, 0);
+               udelay(GPIO_RTC_DELAY_TIME);
+               gpio_set_value(moxart_rtc->gpio_sclk, 1);
+               udelay(GPIO_RTC_DELAY_TIME);
+               if (gpio_get_value(moxart_rtc->gpio_data))
+                       data |= (1 << i);
+               udelay(GPIO_RTC_DELAY_TIME);
+       }
+       return data;
+}
+
+static u8 moxart_rtc_read_register(struct device *dev, u8 cmd)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+       u8 data;
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       gpio_direction_output(moxart_rtc->gpio_data, 0);
+       gpio_set_value(moxart_rtc->gpio_reset, 1);
+       udelay(GPIO_RTC_DELAY_TIME);
+       moxart_rtc_write_byte(dev, cmd);
+       gpio_direction_input(moxart_rtc->gpio_data);
+       udelay(GPIO_RTC_DELAY_TIME);
+       data = moxart_rtc_read_byte(dev);
+       gpio_set_value(moxart_rtc->gpio_sclk, 0);
+       gpio_set_value(moxart_rtc->gpio_reset, 0);
+       udelay(GPIO_RTC_DELAY_TIME);
+
+       local_irq_restore(flags);
+
+       return data;
+}
+
+static void moxart_rtc_write_register(struct device *dev, u8 cmd, u8 data)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+       unsigned long flags;
+
+       local_irq_save(flags);
+
+       gpio_direction_output(moxart_rtc->gpio_data, 0);
+       gpio_set_value(moxart_rtc->gpio_reset, 1);
+       udelay(GPIO_RTC_DELAY_TIME);
+       moxart_rtc_write_byte(dev, cmd);
+       moxart_rtc_write_byte(dev, data);
+       gpio_set_value(moxart_rtc->gpio_sclk, 0);
+       gpio_set_value(moxart_rtc->gpio_reset, 0);
+       udelay(GPIO_RTC_DELAY_TIME);
+
+       local_irq_restore(flags);
+}
+
+static int moxart_rtc_set_time(struct device *dev, struct rtc_time *tm)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+
+       spin_lock_irq(&moxart_rtc->rtc_lock);
+
+       moxart_rtc_write_register(dev, GPIO_RTC_PROTECT_W, 0);
+       moxart_rtc_write_register(dev, GPIO_RTC_YEAR_W,
+                                 (((tm->tm_year - 100) / 10) << 4) |
+                                 ((tm->tm_year - 100) % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_MONTH_W,
+                                 (((tm->tm_mon + 1) / 10) << 4) |
+                                 ((tm->tm_mon + 1) % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_DATE_W,
+                                 ((tm->tm_mday / 10) << 4) |
+                                 (tm->tm_mday % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_HOURS_W,
+                                 ((tm->tm_hour / 10) << 4) |
+                                 (tm->tm_hour % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_MINUTES_W,
+                                 ((tm->tm_min / 10) << 4) |
+                                 (tm->tm_min % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_SECONDS_W,
+                                 ((tm->tm_sec / 10) << 4) |
+                                 (tm->tm_sec % 10));
+
+       moxart_rtc_write_register(dev, GPIO_RTC_PROTECT_W, 0x80);
+
+       spin_unlock_irq(&moxart_rtc->rtc_lock);
+
+       dev_dbg(dev, "%s: success tm_year=%d tm_mon=%d\n"
+               "tm_mday=%d tm_hour=%d tm_min=%d tm_sec=%d\n",
+               __func__, tm->tm_year, tm->tm_mon, tm->tm_mday,
+               tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+       return 0;
+}
+
+static int moxart_rtc_read_time(struct device *dev, struct rtc_time *tm)
+{
+       struct moxart_rtc *moxart_rtc = dev_get_drvdata(dev);
+       unsigned char v;
+
+       spin_lock_irq(&moxart_rtc->rtc_lock);
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_SECONDS_R);
+       tm->tm_sec = (((v & 0x70) >> 4) * 10) + (v & 0x0F);
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_MINUTES_R);
+       tm->tm_min = (((v & 0x70) >> 4) * 10) + (v & 0x0F);
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_HOURS_R);
+       if (v & 0x80) { /* 12-hour mode */
+               tm->tm_hour = (((v & 0x10) >> 4) * 10) + (v & 0x0F);
+               if (v & 0x20) { /* PM mode */
+                       tm->tm_hour += 12;
+                       if (tm->tm_hour >= 24)
+                               tm->tm_hour = 0;
+               }
+       } else { /* 24-hour mode */
+               tm->tm_hour = (((v & 0x30) >> 4) * 10) + (v & 0x0F);
+       }
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_DATE_R);
+       tm->tm_mday = (((v & 0x30) >> 4) * 10) + (v & 0x0F);
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_MONTH_R);
+       tm->tm_mon = (((v & 0x10) >> 4) * 10) + (v & 0x0F);
+       tm->tm_mon--;
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_YEAR_R);
+       tm->tm_year = (((v & 0xF0) >> 4) * 10) + (v & 0x0F);
+       tm->tm_year += 100;
+       if (tm->tm_year <= 69)
+               tm->tm_year += 100;
+
+       v = moxart_rtc_read_register(dev, GPIO_RTC_DAY_R);
+       tm->tm_wday = (v & 0x0f) - 1;
+       tm->tm_yday = day_of_year[tm->tm_mon];
+       tm->tm_yday += (tm->tm_mday - 1);
+       if (tm->tm_mon >= 2) {
+               if (!(tm->tm_year % 4) && (tm->tm_year % 100))
+                       tm->tm_yday++;
+       }
+
+       tm->tm_isdst = 0;
+
+       spin_unlock_irq(&moxart_rtc->rtc_lock);
+
+       return 0;
+}
+
+static const struct rtc_class_ops moxart_rtc_ops = {
+       .read_time      = moxart_rtc_read_time,
+       .set_time       = moxart_rtc_set_time,
+};
+
+static int moxart_rtc_probe(struct platform_device *pdev)
+{
+       struct moxart_rtc *moxart_rtc;
+       int ret = 0;
+
+       moxart_rtc = devm_kzalloc(&pdev->dev, sizeof(*moxart_rtc), GFP_KERNEL);
+       if (!moxart_rtc) {
+               dev_err(&pdev->dev, "devm_kzalloc failed\n");
+               return -ENOMEM;
+       }
+
+       moxart_rtc->gpio_data = of_get_named_gpio(pdev->dev.of_node,
+                                                 "gpio-rtc-data", 0);
+       if (!gpio_is_valid(moxart_rtc->gpio_data)) {
+               dev_err(&pdev->dev, "invalid gpio (data): %d\n",
+                       moxart_rtc->gpio_data);
+               return moxart_rtc->gpio_data;
+       }
+
+       moxart_rtc->gpio_sclk = of_get_named_gpio(pdev->dev.of_node,
+                                                 "gpio-rtc-sclk", 0);
+       if (!gpio_is_valid(moxart_rtc->gpio_sclk)) {
+               dev_err(&pdev->dev, "invalid gpio (sclk): %d\n",
+                       moxart_rtc->gpio_sclk);
+               return moxart_rtc->gpio_sclk;
+       }
+
+       moxart_rtc->gpio_reset = of_get_named_gpio(pdev->dev.of_node,
+                                                  "gpio-rtc-reset", 0);
+       if (!gpio_is_valid(moxart_rtc->gpio_reset)) {
+               dev_err(&pdev->dev, "invalid gpio (reset): %d\n",
+                       moxart_rtc->gpio_reset);
+               return moxart_rtc->gpio_reset;
+       }
+
+       spin_lock_init(&moxart_rtc->rtc_lock);
+       platform_set_drvdata(pdev, moxart_rtc);
+
+       ret = devm_gpio_request(&pdev->dev, moxart_rtc->gpio_data, "rtc_data");
+       if (ret) {
+               dev_err(&pdev->dev, "can't get rtc_data gpio\n");
+               return ret;
+       }
+
+       ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_sclk,
+                                   GPIOF_DIR_OUT, "rtc_sclk");
+       if (ret) {
+               dev_err(&pdev->dev, "can't get rtc_sclk gpio\n");
+               return ret;
+       }
+
+       ret = devm_gpio_request_one(&pdev->dev, moxart_rtc->gpio_reset,
+                                   GPIOF_DIR_OUT, "rtc_reset");
+       if (ret) {
+               dev_err(&pdev->dev, "can't get rtc_reset gpio\n");
+               return ret;
+       }
+
+       moxart_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+                                                  &moxart_rtc_ops,
+                                                  THIS_MODULE);
+       if (IS_ERR(moxart_rtc->rtc)) {
+               dev_err(&pdev->dev, "devm_rtc_device_register failed\n");
+               return PTR_ERR(moxart_rtc->rtc);
+       }
+
+       return 0;
+}
+
+static const struct of_device_id moxart_rtc_match[] = {
+       { .compatible = "moxa,moxart-rtc" },
+       { },
+};
+
+static struct platform_driver moxart_rtc_driver = {
+       .probe  = moxart_rtc_probe,
+       .driver = {
+               .name           = "moxart-rtc",
+               .owner          = THIS_MODULE,
+               .of_match_table = moxart_rtc_match,
+       },
+};
+module_platform_driver(moxart_rtc_driver);
+
+MODULE_DESCRIPTION("MOXART RTC driver");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>");
index baab802..d536c59 100644 (file)
@@ -221,26 +221,17 @@ static int __init mv_rtc_probe(struct platform_device *pdev)
 {
        struct resource *res;
        struct rtc_plat_data *pdata;
-       resource_size_t size;
        u32 rtc_time;
        int ret = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
 
-       size = resource_size(res);
-       if (!devm_request_mem_region(&pdev->dev, res->start, size,
-                                    pdev->name))
-               return -EBUSY;
-
-       pdata->ioaddr = devm_ioremap(&pdev->dev, res->start, size);
-       if (!pdata->ioaddr)
-               return -ENOMEM;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(pdata->ioaddr))
+               return PTR_ERR(pdata->ioaddr);
 
        pdata->clk = devm_clk_get(&pdev->dev, NULL);
        /* Not all SoCs require a clock.*/
index ab87bac..50c5726 100644 (file)
@@ -377,22 +377,16 @@ static int mxc_rtc_probe(struct platform_device *pdev)
        unsigned long rate;
        int ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
 
        pdata->devtype = pdev->id_entry->driver_data;
 
-       if (!devm_request_mem_region(&pdev->dev, res->start,
-                                    resource_size(res), pdev->name))
-               return -EBUSY;
-
-       pdata->ioaddr = devm_ioremap(&pdev->dev, res->start,
-                                    resource_size(res));
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(pdata->ioaddr))
+               return PTR_ERR(pdata->ioaddr);
 
        pdata->clk = devm_clk_get(&pdev->dev, NULL);
        if (IS_ERR(pdata->clk)) {
index 22861c5..248653c 100644 (file)
@@ -99,7 +99,7 @@ static int *check_rtc_access_enable(struct nuc900_rtc *nuc900_rtc)
        if (!timeout)
                return ERR_PTR(-EPERM);
 
-       return 0;
+       return NULL;
 }
 
 static int nuc900_rtc_bcd2bin(unsigned int timereg,
index c6ffbae..c7d97ee 100644 (file)
@@ -70,6 +70,8 @@
 #define OMAP_RTC_KICK0_REG             0x6c
 #define OMAP_RTC_KICK1_REG             0x70
 
+#define OMAP_RTC_IRQWAKEEN             0x7c
+
 /* OMAP_RTC_CTRL_REG bit fields: */
 #define OMAP_RTC_CTRL_SPLIT            (1<<7)
 #define OMAP_RTC_CTRL_DISABLE          (1<<6)
 #define OMAP_RTC_INTERRUPTS_IT_ALARM    (1<<3)
 #define OMAP_RTC_INTERRUPTS_IT_TIMER    (1<<2)
 
+/* OMAP_RTC_IRQWAKEEN bit fields: */
+#define OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN    (1<<1)
+
 /* OMAP_RTC_KICKER values */
 #define        KICK0_VALUE                     0x83e70b13
 #define        KICK1_VALUE                     0x95a4f1e0
 
 #define        OMAP_RTC_HAS_KICKER             0x1
 
+/*
+ * Few RTC IP revisions has special WAKE-EN Register to enable Wakeup
+ * generation for event Alarm.
+ */
+#define        OMAP_RTC_HAS_IRQWAKEEN          0x2
+
 static void __iomem    *rtc_base;
 
 #define rtc_read(addr)         readb(rtc_base + (addr))
@@ -299,12 +310,18 @@ static struct rtc_class_ops omap_rtc_ops = {
 static int omap_rtc_alarm;
 static int omap_rtc_timer;
 
-#define        OMAP_RTC_DATA_DA830_IDX 1
+#define        OMAP_RTC_DATA_AM3352_IDX        1
+#define        OMAP_RTC_DATA_DA830_IDX         2
 
 static struct platform_device_id omap_rtc_devtype[] = {
        {
                .name   = DRIVER_NAME,
-       }, {
+       },
+       [OMAP_RTC_DATA_AM3352_IDX] = {
+               .name   = "am3352-rtc",
+               .driver_data = OMAP_RTC_HAS_KICKER | OMAP_RTC_HAS_IRQWAKEEN,
+       },
+       [OMAP_RTC_DATA_DA830_IDX] = {
                .name   = "da830-rtc",
                .driver_data = OMAP_RTC_HAS_KICKER,
        },
@@ -316,6 +333,9 @@ static const struct of_device_id omap_rtc_of_match[] = {
        {       .compatible     = "ti,da830-rtc",
                .data           = &omap_rtc_devtype[OMAP_RTC_DATA_DA830_IDX],
        },
+       {       .compatible     = "ti,am3352-rtc",
+               .data           = &omap_rtc_devtype[OMAP_RTC_DATA_AM3352_IDX],
+       },
        {},
 };
 MODULE_DEVICE_TABLE(of, omap_rtc_of_match);
@@ -464,16 +484,28 @@ static u8 irqstat;
 
 static int omap_rtc_suspend(struct device *dev)
 {
+       u8 irqwake_stat;
+       struct platform_device *pdev = to_platform_device(dev);
+       const struct platform_device_id *id_entry =
+                                       platform_get_device_id(pdev);
+
        irqstat = rtc_read(OMAP_RTC_INTERRUPTS_REG);
 
        /* FIXME the RTC alarm is not currently acting as a wakeup event
-        * source, and in fact this enable() call is just saving a flag
-        * that's never used...
+        * source on some platforms, and in fact this enable() call is just
+        * saving a flag that's never used...
         */
-       if (device_may_wakeup(dev))
+       if (device_may_wakeup(dev)) {
                enable_irq_wake(omap_rtc_alarm);
-       else
+
+               if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN) {
+                       irqwake_stat = rtc_read(OMAP_RTC_IRQWAKEEN);
+                       irqwake_stat |= OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN;
+                       rtc_write(irqwake_stat, OMAP_RTC_IRQWAKEEN);
+               }
+       } else {
                rtc_write(0, OMAP_RTC_INTERRUPTS_REG);
+       }
 
        /* Disable the clock/module */
        pm_runtime_put_sync(dev);
@@ -483,13 +515,25 @@ static int omap_rtc_suspend(struct device *dev)
 
 static int omap_rtc_resume(struct device *dev)
 {
+       u8 irqwake_stat;
+       struct platform_device *pdev = to_platform_device(dev);
+       const struct platform_device_id *id_entry =
+                               platform_get_device_id(pdev);
+
        /* Enable the clock/module so that we can access the registers */
        pm_runtime_get_sync(dev);
 
-       if (device_may_wakeup(dev))
+       if (device_may_wakeup(dev)) {
                disable_irq_wake(omap_rtc_alarm);
-       else
+
+               if (id_entry->driver_data & OMAP_RTC_HAS_IRQWAKEEN) {
+                       irqwake_stat = rtc_read(OMAP_RTC_IRQWAKEEN);
+                       irqwake_stat &= ~OMAP_RTC_IRQWAKEEN_ALARM_WAKEEN;
+                       rtc_write(irqwake_stat, OMAP_RTC_IRQWAKEEN);
+               }
+       } else {
                rtc_write(irqstat, OMAP_RTC_INTERRUPTS_REG);
+       }
        return 0;
 }
 #endif
index a1fecc8..fffb7d3 100644 (file)
@@ -238,6 +238,15 @@ static int palmas_rtc_probe(struct platform_device *pdev)
        struct palmas *palmas = dev_get_drvdata(pdev->dev.parent);
        struct palmas_rtc *palmas_rtc = NULL;
        int ret;
+       bool enable_bb_charging = false;
+       bool high_bb_charging;
+
+       if (pdev->dev.of_node) {
+               enable_bb_charging = of_property_read_bool(pdev->dev.of_node,
+                                       "ti,backup-battery-chargeable");
+               high_bb_charging = of_property_read_bool(pdev->dev.of_node,
+                                       "ti,backup-battery-charge-high-current");
+       }
 
        palmas_rtc = devm_kzalloc(&pdev->dev, sizeof(struct palmas_rtc),
                        GFP_KERNEL);
@@ -254,6 +263,32 @@ static int palmas_rtc_probe(struct platform_device *pdev)
        palmas_rtc->dev = &pdev->dev;
        platform_set_drvdata(pdev, palmas_rtc);
 
+       if (enable_bb_charging) {
+               unsigned reg = PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG;
+
+               if (high_bb_charging)
+                       reg = 0;
+
+               ret = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE,
+                       PALMAS_BACKUP_BATTERY_CTRL,
+                       PALMAS_BACKUP_BATTERY_CTRL_BBS_BBC_LOW_ICHRG, reg);
+               if (ret < 0) {
+                       dev_err(&pdev->dev,
+                               "BACKUP_BATTERY_CTRL update failed, %d\n", ret);
+                       return ret;
+               }
+
+               ret = palmas_update_bits(palmas, PALMAS_PMU_CONTROL_BASE,
+                       PALMAS_BACKUP_BATTERY_CTRL,
+                       PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN,
+                       PALMAS_BACKUP_BATTERY_CTRL_BB_CHG_EN);
+               if (ret < 0) {
+                       dev_err(&pdev->dev,
+                               "BACKUP_BATTERY_CTRL update failed, %d\n", ret);
+                       return ret;
+               }
+       }
+
        /* Start RTC */
        ret = palmas_update_bits(palmas, PALMAS_RTC_BASE, PALMAS_RTC_CTRL_REG,
                        PALMAS_RTC_CTRL_REG_STOP_RTC,
index 205b9f7..1ee514a 100644 (file)
@@ -203,11 +203,6 @@ static int pcf2127_probe(struct i2c_client *client,
        return 0;
 }
 
-static int pcf2127_remove(struct i2c_client *client)
-{
-       return 0;
-}
-
 static const struct i2c_device_id pcf2127_id[] = {
        { "pcf2127", 0 },
        { }
@@ -229,7 +224,6 @@ static struct i2c_driver pcf2127_driver = {
                .of_match_table = of_match_ptr(pcf2127_of_match),
        },
        .probe          = pcf2127_probe,
-       .remove         = pcf2127_remove,
        .id_table       = pcf2127_id,
 };
 
index aa7ed4b..63460cf 100644 (file)
@@ -44,6 +44,7 @@ struct sirfsoc_rtc_drv {
        struct rtc_device       *rtc;
        u32                     rtc_base;
        u32                     irq;
+       unsigned                irq_wake;
        /* Overflow for every 8 years extra time */
        u32                     overflow_rtc;
 #ifdef CONFIG_PM
@@ -355,8 +356,8 @@ static int sirfsoc_rtc_suspend(struct device *dev)
        rtcdrv->saved_counter =
                sirfsoc_rtc_iobrg_readl(rtcdrv->rtc_base + RTC_CN);
        rtcdrv->saved_overflow_rtc = rtcdrv->overflow_rtc;
-       if (device_may_wakeup(&pdev->dev))
-               enable_irq_wake(rtcdrv->irq);
+       if (device_may_wakeup(&pdev->dev) && !enable_irq_wake(rtcdrv->irq))
+               rtcdrv->irq_wake = 1;
 
        return 0;
 }
@@ -423,8 +424,10 @@ static int sirfsoc_rtc_resume(struct device *dev)
        struct platform_device *pdev = to_platform_device(dev);
        struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev);
        sirfsoc_rtc_thaw(dev);
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(&pdev->dev) && rtcdrv->irq_wake) {
                disable_irq_wake(rtcdrv->irq);
+               rtcdrv->irq_wake = 0;
+       }
 
        return 0;
 }
@@ -434,8 +437,10 @@ static int sirfsoc_rtc_restore(struct device *dev)
        struct platform_device *pdev = to_platform_device(dev);
        struct sirfsoc_rtc_drv *rtcdrv = platform_get_drvdata(pdev);
 
-       if (device_may_wakeup(&pdev->dev))
+       if (device_may_wakeup(&pdev->dev) && rtcdrv->irq_wake) {
                disable_irq_wake(rtcdrv->irq);
+               rtcdrv->irq_wake = 0;
+       }
        return 0;
 }
 
index af5e97e..a176ba6 100644 (file)
@@ -294,19 +294,14 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
        void __iomem *ioaddr;
        int ret = 0;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
-
        pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
        if (!pdata)
                return -ENOMEM;
-       if (!devm_request_mem_region(&pdev->dev, res->start, RTC_REG_SIZE,
-                       pdev->name))
-               return -EBUSY;
-       ioaddr = devm_ioremap(&pdev->dev, res->start, RTC_REG_SIZE);
-       if (!ioaddr)
-               return -ENOMEM;
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       ioaddr = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(ioaddr))
+               return PTR_ERR(ioaddr);
        pdata->ioaddr = ioaddr;
        pdata->irq = platform_get_irq(pdev, 0);
 
index f9a0677..4f87234 100644 (file)
@@ -244,9 +244,6 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
        struct resource *res;
        int irq, ret;
 
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!res)
-               return -ENODEV;
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
                return -ENODEV;
@@ -255,13 +252,10 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
                return -ENOMEM;
        platform_set_drvdata(pdev, pdata);
 
-       if (!devm_request_mem_region(&pdev->dev, res->start,
-                                    resource_size(res), pdev->name))
-               return -EBUSY;
-       pdata->rtcreg = devm_ioremap(&pdev->dev, res->start,
-                                    resource_size(res));
-       if (!pdata->rtcreg)
-               return -EBUSY;
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       pdata->rtcreg = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(pdata->rtcreg))
+               return PTR_ERR(pdata->rtcreg);
 
        spin_lock_init(&pdata->lock);
        tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
index 9e5e146..794820a 100644 (file)
@@ -30,8 +30,8 @@
 
 #define TRACE(x...) debug_sprintf_event(zcore_dbf, 1, x)
 
-#define TO_USER                0
-#define TO_KERNEL      1
+#define TO_USER                1
+#define TO_KERNEL      0
 #define CHUNK_INFO_SIZE        34 /* 2 16-byte char, each followed by blank */
 
 enum arch_id {
@@ -73,7 +73,7 @@ static struct ipl_parameter_block *ipl_block;
  * @count: Size of buffer, which should be copied
  * @mode:  Either TO_KERNEL or TO_USER
  */
-static int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode)
+int memcpy_hsa(void *dest, unsigned long src, size_t count, int mode)
 {
        int offs, blk_num;
        static char buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
index 6917b4f..22d5a94 100644 (file)
@@ -692,7 +692,7 @@ ahc_find_pci_device(ahc_dev_softc_t pci)
         * ID as valid.
         */
        if (ahc_get_pci_function(pci) > 0
-        && ahc_9005_subdevinfo_valid(vendor, device, subvendor, subdevice)
+        && ahc_9005_subdevinfo_valid(device, vendor, subdevice, subvendor)
         && SUBID_9005_MFUNCENB(subdevice) == 0)
                return (NULL);
 
index 8582929..2ec3c23 100644 (file)
@@ -860,8 +860,13 @@ bool esas2r_process_fs_ioctl(struct esas2r_adapter *a,
                return false;
        }
 
+       if (fsc->command >= cmdcnt) {
+               fs->status = ATTO_STS_INV_FUNC;
+               return false;
+       }
+
        func = cmd_to_fls_func[fsc->command];
-       if (fsc->command >= cmdcnt || func == 0xFF) {
+       if (func == 0xFF) {
                fs->status = ATTO_STS_INV_FUNC;
                return false;
        }
@@ -1355,7 +1360,7 @@ void esas2r_nvram_set_defaults(struct esas2r_adapter *a)
        u32 time = jiffies_to_msecs(jiffies);
 
        esas2r_lock_clear_flags(&a->flags, AF_NVR_VALID);
-       memcpy(n, &default_sas_nvram, sizeof(struct esas2r_sas_nvram));
+       *n = default_sas_nvram;
        n->sas_addr[3] |= 0x0F;
        n->sas_addr[4] = HIBYTE(LOWORD(time));
        n->sas_addr[5] = LOBYTE(LOWORD(time));
@@ -1373,7 +1378,7 @@ void esas2r_nvram_get_defaults(struct esas2r_adapter *a,
         * address out first.
         */
        memcpy(&sas_addr[0], a->nvram->sas_addr, 8);
-       memcpy(nvram, &default_sas_nvram, sizeof(struct esas2r_sas_nvram));
+       *nvram = default_sas_nvram;
        memcpy(&nvram->sas_addr[0], &sas_addr[0], 8);
 }
 
index 3a798e7..da1869d 100644 (file)
@@ -665,7 +665,7 @@ void esas2r_kill_adapter(int i)
 
 int esas2r_cleanup(struct Scsi_Host *host)
 {
-       struct esas2r_adapter *a = (struct esas2r_adapter *)host->hostdata;
+       struct esas2r_adapter *a;
        int index;
 
        if (host == NULL) {
@@ -678,6 +678,7 @@ int esas2r_cleanup(struct Scsi_Host *host)
        }
 
        esas2r_debug("esas2r_cleanup called for host %p", host);
+       a = (struct esas2r_adapter *)host->hostdata;
        index = a->index;
        esas2r_kill_adapter(index);
        return index;
@@ -808,7 +809,7 @@ static void esas2r_init_pci_cfg_space(struct esas2r_adapter *a)
        int pcie_cap_reg;
 
        pcie_cap_reg = pci_find_capability(a->pcid, PCI_CAP_ID_EXP);
-       if (0xffff && pcie_cap_reg) {
+       if (0xffff & pcie_cap_reg) {
                u16 devcontrol;
 
                pci_read_config_word(a->pcid, pcie_cap_reg + PCI_EXP_DEVCTL,
@@ -1550,8 +1551,7 @@ void esas2r_reset_chip(struct esas2r_adapter *a)
         * to not overwrite a previous crash that was saved.
         */
        if ((a->flags2 & AF2_COREDUMP_AVAIL)
-           && !(a->flags2 & AF2_COREDUMP_SAVED)
-           && a->fw_coredump_buff) {
+           && !(a->flags2 & AF2_COREDUMP_SAVED)) {
                esas2r_read_mem_block(a,
                                      a->fw_coredump_buff,
                                      MW_DATA_ADDR_SRAM + 0x80000,
index f3d0cb8..e5b0902 100644 (file)
@@ -415,7 +415,7 @@ static int csmi_ioctl_callback(struct esas2r_adapter *a,
                lun = tm->lun;
        }
 
-       if (path > 0 || tid > ESAS2R_MAX_ID) {
+       if (path > 0) {
                rq->func_rsp.ioctl_rsp.csmi.csmi_status = cpu_to_le32(
                        CSMI_STS_INV_PARAM);
                return false;
index f8ec6d6..fd13928 100644 (file)
@@ -302,6 +302,7 @@ static void esas2r_complete_vda_ioctl(struct esas2r_adapter *a,
                if (vi->cmd.cfg.cfg_func == VDA_CFG_GET_INIT) {
                        struct atto_ioctl_vda_cfg_cmd *cfg = &vi->cmd.cfg;
                        struct atto_vda_cfg_rsp *rsp = &rq->func_rsp.cfg_rsp;
+                       char buf[sizeof(cfg->data.init.fw_release) + 1];
 
                        cfg->data_length =
                                cpu_to_le32(sizeof(struct atto_vda_cfg_init));
@@ -309,11 +310,13 @@ static void esas2r_complete_vda_ioctl(struct esas2r_adapter *a,
                                le32_to_cpu(rsp->vda_version);
                        cfg->data.init.fw_build = rsp->fw_build;
 
-                       sprintf((char *)&cfg->data.init.fw_release,
-                               "%1d.%02d",
+                       snprintf(buf, sizeof(buf), "%1d.%02d",
                                (int)LOBYTE(le16_to_cpu(rsp->fw_release)),
                                (int)HIBYTE(le16_to_cpu(rsp->fw_release)));
 
+                       memcpy(&cfg->data.init.fw_release, buf,
+                              sizeof(cfg->data.init.fw_release));
+
                        if (LOWORD(LOBYTE(cfg->data.init.fw_build)) == 'A')
                                cfg->data.init.fw_version =
                                        cfg->data.init.fw_build;
index c18c681..e4dd3d7 100644 (file)
@@ -43,6 +43,8 @@
 #define DFX                     DRV_NAME "%d: "
 
 #define DESC_CLEAN_LOW_WATERMARK 8
+#define FNIC_UCSM_DFLT_THROTTLE_CNT_BLD        16 /* UCSM default throttle count */
+#define FNIC_MIN_IO_REQ                        256 /* Min IO throttle count */
 #define FNIC_MAX_IO_REQ                2048 /* scsi_cmnd tag map entries */
 #define        FNIC_IO_LOCKS           64 /* IO locks: power of 2 */
 #define FNIC_DFLT_QUEUE_DEPTH  32
@@ -154,6 +156,9 @@ do {                                                                \
        FNIC_CHECK_LOGGING(FNIC_ISR_LOGGING,                    \
                         shost_printk(kern_level, host, fmt, ##args);)
 
+#define FNIC_MAIN_NOTE(kern_level, host, fmt, args...)          \
+       shost_printk(kern_level, host, fmt, ##args)
+
 extern const char *fnic_state_str[];
 
 enum fnic_intx_intr_index {
@@ -215,10 +220,12 @@ struct fnic {
 
        struct vnic_stats *stats;
        unsigned long stats_time;       /* time of stats update */
+       unsigned long stats_reset_time; /* time of stats reset */
        struct vnic_nic_cfg *nic_cfg;
        char name[IFNAMSIZ];
        struct timer_list notify_timer; /* used for MSI interrupts */
 
+       unsigned int fnic_max_tag_id;
        unsigned int err_intr_offset;
        unsigned int link_intr_offset;
 
@@ -359,4 +366,5 @@ fnic_chk_state_flags_locked(struct fnic *fnic, unsigned long st_flags)
        return ((fnic->state_flags & st_flags) == st_flags);
 }
 void __fnic_set_state_flags(struct fnic *, unsigned long, unsigned long);
+void fnic_dump_fchost_stats(struct Scsi_Host *, struct fc_host_statistics *);
 #endif /* _FNIC_H_ */
index 42e15ee..bbf81ea 100644 (file)
@@ -74,6 +74,10 @@ module_param(fnic_trace_max_pages, uint, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(fnic_trace_max_pages, "Total allocated memory pages "
                                        "for fnic trace buffer");
 
+static unsigned int fnic_max_qdepth = FNIC_DFLT_QUEUE_DEPTH;
+module_param(fnic_max_qdepth, uint, S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(fnic_max_qdepth, "Queue depth to report for each LUN");
+
 static struct libfc_function_template fnic_transport_template = {
        .frame_send = fnic_send,
        .lport_set_port_id = fnic_set_port_id,
@@ -91,7 +95,7 @@ static int fnic_slave_alloc(struct scsi_device *sdev)
        if (!rport || fc_remote_port_chkready(rport))
                return -ENXIO;
 
-       scsi_activate_tcq(sdev, FNIC_DFLT_QUEUE_DEPTH);
+       scsi_activate_tcq(sdev, fnic_max_qdepth);
        return 0;
 }
 
@@ -126,6 +130,7 @@ fnic_set_rport_dev_loss_tmo(struct fc_rport *rport, u32 timeout)
 static void fnic_get_host_speed(struct Scsi_Host *shost);
 static struct scsi_transport_template *fnic_fc_transport;
 static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *);
+static void fnic_reset_host_stats(struct Scsi_Host *);
 
 static struct fc_function_template fnic_fc_functions = {
 
@@ -153,6 +158,7 @@ static struct fc_function_template fnic_fc_functions = {
        .set_rport_dev_loss_tmo = fnic_set_rport_dev_loss_tmo,
        .issue_fc_host_lip = fnic_reset,
        .get_fc_host_stats = fnic_get_stats,
+       .reset_fc_host_stats = fnic_reset_host_stats,
        .dd_fcrport_size = sizeof(struct fc_rport_libfc_priv),
        .terminate_rport_io = fnic_terminate_rport_io,
        .bsg_request = fc_lport_bsg_request,
@@ -206,13 +212,116 @@ static struct fc_host_statistics *fnic_get_stats(struct Scsi_Host *host)
        stats->error_frames = vs->tx.tx_errors + vs->rx.rx_errors;
        stats->dumped_frames = vs->tx.tx_drops + vs->rx.rx_drop;
        stats->invalid_crc_count = vs->rx.rx_crc_errors;
-       stats->seconds_since_last_reset = (jiffies - lp->boot_time) / HZ;
+       stats->seconds_since_last_reset =
+                       (jiffies - fnic->stats_reset_time) / HZ;
        stats->fcp_input_megabytes = div_u64(fnic->fcp_input_bytes, 1000000);
        stats->fcp_output_megabytes = div_u64(fnic->fcp_output_bytes, 1000000);
 
        return stats;
 }
 
+/*
+ * fnic_dump_fchost_stats
+ * note : dumps fc_statistics into system logs
+ */
+void fnic_dump_fchost_stats(struct Scsi_Host *host,
+                               struct fc_host_statistics *stats)
+{
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: seconds since last reset = %llu\n",
+                       stats->seconds_since_last_reset);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: tx frames                = %llu\n",
+                       stats->tx_frames);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: tx words         = %llu\n",
+                       stats->tx_words);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: rx frames                = %llu\n",
+                       stats->rx_frames);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: rx words         = %llu\n",
+                       stats->rx_words);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: lip count                = %llu\n",
+                       stats->lip_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: nos count                = %llu\n",
+                       stats->nos_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: error frames             = %llu\n",
+                       stats->error_frames);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: dumped frames    = %llu\n",
+                       stats->dumped_frames);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: link failure count       = %llu\n",
+                       stats->link_failure_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: loss of sync count       = %llu\n",
+                       stats->loss_of_sync_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: loss of signal count     = %llu\n",
+                       stats->loss_of_signal_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: prim seq protocol err count = %llu\n",
+                       stats->prim_seq_protocol_err_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: invalid tx word count= %llu\n",
+                       stats->invalid_tx_word_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: invalid crc count        = %llu\n",
+                       stats->invalid_crc_count);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: fcp input requests       = %llu\n",
+                       stats->fcp_input_requests);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: fcp output requests      = %llu\n",
+                       stats->fcp_output_requests);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: fcp control requests     = %llu\n",
+                       stats->fcp_control_requests);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: fcp input megabytes      = %llu\n",
+                       stats->fcp_input_megabytes);
+       FNIC_MAIN_NOTE(KERN_NOTICE, host,
+                       "fnic: fcp output megabytes     = %llu\n",
+                       stats->fcp_output_megabytes);
+       return;
+}
+
+/*
+ * fnic_reset_host_stats : clears host stats
+ * note : called when reset_statistics set under sysfs dir
+ */
+static void fnic_reset_host_stats(struct Scsi_Host *host)
+{
+       int ret;
+       struct fc_lport *lp = shost_priv(host);
+       struct fnic *fnic = lport_priv(lp);
+       struct fc_host_statistics *stats;
+       unsigned long flags;
+
+       /* dump current stats, before clearing them */
+       stats = fnic_get_stats(host);
+       fnic_dump_fchost_stats(host, stats);
+
+       spin_lock_irqsave(&fnic->fnic_lock, flags);
+       ret = vnic_dev_stats_clear(fnic->vdev);
+       spin_unlock_irqrestore(&fnic->fnic_lock, flags);
+
+       if (ret) {
+               FNIC_MAIN_DBG(KERN_DEBUG, fnic->lport->host,
+                               "fnic: Reset vnic stats failed"
+                               " 0x%x", ret);
+               return;
+       }
+       fnic->stats_reset_time = jiffies;
+       memset(stats, 0, sizeof(*stats));
+
+       return;
+}
+
 void fnic_log_q_error(struct fnic *fnic)
 {
        unsigned int i;
@@ -447,13 +556,6 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        host->transportt = fnic_fc_transport;
 
-       err = scsi_init_shared_tag_map(host, FNIC_MAX_IO_REQ);
-       if (err) {
-               shost_printk(KERN_ERR, fnic->lport->host,
-                            "Unable to alloc shared tag map\n");
-               goto err_out_free_hba;
-       }
-
        /* Setup PCI resources */
        pci_set_drvdata(pdev, fnic);
 
@@ -476,10 +578,10 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        pci_set_master(pdev);
 
        /* Query PCI controller on system for DMA addressing
-        * limitation for the device.  Try 40-bit first, and
+        * limitation for the device.  Try 64-bit first, and
         * fail to 32-bit.
         */
-       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(40));
+       err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
        if (err) {
                err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
                if (err) {
@@ -496,10 +598,10 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                        goto err_out_release_regions;
                }
        } else {
-               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
+               err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
                if (err) {
                        shost_printk(KERN_ERR, fnic->lport->host,
-                                    "Unable to obtain 40-bit DMA "
+                                    "Unable to obtain 64-bit DMA "
                                     "for consistent allocations, aborting.\n");
                        goto err_out_release_regions;
                }
@@ -566,6 +668,22 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                             "aborting.\n");
                goto err_out_dev_close;
        }
+
+       /* Configure Maximum Outstanding IO reqs*/
+       if (fnic->config.io_throttle_count != FNIC_UCSM_DFLT_THROTTLE_CNT_BLD) {
+               host->can_queue = min_t(u32, FNIC_MAX_IO_REQ,
+                                       max_t(u32, FNIC_MIN_IO_REQ,
+                                       fnic->config.io_throttle_count));
+       }
+       fnic->fnic_max_tag_id = host->can_queue;
+
+       err = scsi_init_shared_tag_map(host, fnic->fnic_max_tag_id);
+       if (err) {
+               shost_printk(KERN_ERR, fnic->lport->host,
+                         "Unable to alloc shared tag map\n");
+               goto err_out_dev_close;
+       }
+
        host->max_lun = fnic->config.luns_per_tgt;
        host->max_id = FNIC_MAX_FCP_TARGET;
        host->max_cmd_len = FCOE_MAX_CMD_LEN;
@@ -719,6 +837,7 @@ static int fnic_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
 
        fc_lport_init_stats(lp);
+       fnic->stats_reset_time = jiffies;
 
        fc_lport_config(lp);
 
index a97e6e5..d014aae 100644 (file)
@@ -111,6 +111,12 @@ static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
        return &fnic->io_req_lock[hash];
 }
 
+static inline spinlock_t *fnic_io_lock_tag(struct fnic *fnic,
+                                           int tag)
+{
+       return &fnic->io_req_lock[tag & (FNIC_IO_LOCKS - 1)];
+}
+
 /*
  * Unmap the data buffer and sense buffer for an io_req,
  * also unmap and free the device-private scatter/gather list.
@@ -730,7 +736,7 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
        fcpio_tag_id_dec(&tag, &id);
        icmnd_cmpl = &desc->u.icmnd_cmpl;
 
-       if (id >= FNIC_MAX_IO_REQ) {
+       if (id >= fnic->fnic_max_tag_id) {
                shost_printk(KERN_ERR, fnic->lport->host,
                        "Tag out of range tag %x hdr status = %s\n",
                             id, fnic_fcpio_status_to_str(hdr_status));
@@ -818,38 +824,6 @@ static void fnic_fcpio_icmnd_cmpl_handler(struct fnic *fnic,
                if (icmnd_cmpl->flags & FCPIO_ICMND_CMPL_RESID_UNDER)
                        xfer_len -= icmnd_cmpl->residual;
 
-               /*
-                * If queue_full, then try to reduce queue depth for all
-                * LUNS on the target. Todo: this should be accompanied
-                * by a periodic queue_depth rampup based on successful
-                * IO completion.
-                */
-               if (icmnd_cmpl->scsi_status == QUEUE_FULL) {
-                       struct scsi_device *t_sdev;
-                       int qd = 0;
-
-                       shost_for_each_device(t_sdev, sc->device->host) {
-                               if (t_sdev->id != sc->device->id)
-                                       continue;
-
-                               if (t_sdev->queue_depth > 1) {
-                                       qd = scsi_track_queue_full
-                                               (t_sdev,
-                                                t_sdev->queue_depth - 1);
-                                       if (qd == -1)
-                                               qd = t_sdev->host->cmd_per_lun;
-                                       shost_printk(KERN_INFO,
-                                                    fnic->lport->host,
-                                                    "scsi[%d:%d:%d:%d"
-                                                    "] queue full detected,"
-                                                    "new depth = %d\n",
-                                                    t_sdev->host->host_no,
-                                                    t_sdev->channel,
-                                                    t_sdev->id, t_sdev->lun,
-                                                    t_sdev->queue_depth);
-                               }
-                       }
-               }
                break;
 
        case FCPIO_TIMEOUT:          /* request was timed out */
@@ -939,7 +913,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
        fcpio_header_dec(&desc->hdr, &type, &hdr_status, &tag);
        fcpio_tag_id_dec(&tag, &id);
 
-       if ((id & FNIC_TAG_MASK) >= FNIC_MAX_IO_REQ) {
+       if ((id & FNIC_TAG_MASK) >= fnic->fnic_max_tag_id) {
                shost_printk(KERN_ERR, fnic->lport->host,
                "Tag out of range tag %x hdr status = %s\n",
                id, fnic_fcpio_status_to_str(hdr_status));
@@ -988,9 +962,7 @@ static void fnic_fcpio_itmf_cmpl_handler(struct fnic *fnic,
                        spin_unlock_irqrestore(io_lock, flags);
                        return;
                }
-               CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
                CMD_ABTS_STATUS(sc) = hdr_status;
-
                CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
                FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
                              "abts cmpl recd. id %d status %s\n",
@@ -1148,23 +1120,25 @@ int fnic_wq_copy_cmpl_handler(struct fnic *fnic, int copy_work_to_do)
 
 static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
 {
-       unsigned int i;
+       int i;
        struct fnic_io_req *io_req;
        unsigned long flags = 0;
        struct scsi_cmnd *sc;
        spinlock_t *io_lock;
        unsigned long start_time = 0;
 
-       for (i = 0; i < FNIC_MAX_IO_REQ; i++) {
+       for (i = 0; i < fnic->fnic_max_tag_id; i++) {
                if (i == exclude_id)
                        continue;
 
+               io_lock = fnic_io_lock_tag(fnic, i);
+               spin_lock_irqsave(io_lock, flags);
                sc = scsi_host_find_tag(fnic->lport->host, i);
-               if (!sc)
+               if (!sc) {
+                       spin_unlock_irqrestore(io_lock, flags);
                        continue;
+               }
 
-               io_lock = fnic_io_lock_hash(fnic, sc);
-               spin_lock_irqsave(io_lock, flags);
                io_req = (struct fnic_io_req *)CMD_SP(sc);
                if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
                        !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
@@ -1236,7 +1210,7 @@ void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
        fcpio_tag_id_dec(&desc->hdr.tag, &id);
        id &= FNIC_TAG_MASK;
 
-       if (id >= FNIC_MAX_IO_REQ)
+       if (id >= fnic->fnic_max_tag_id)
                return;
 
        sc = scsi_host_find_tag(fnic->lport->host, id);
@@ -1340,14 +1314,15 @@ static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
        if (fnic->in_remove)
                return;
 
-       for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+       for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
                abt_tag = tag;
+               io_lock = fnic_io_lock_tag(fnic, tag);
+               spin_lock_irqsave(io_lock, flags);
                sc = scsi_host_find_tag(fnic->lport->host, tag);
-               if (!sc)
+               if (!sc) {
+                       spin_unlock_irqrestore(io_lock, flags);
                        continue;
-
-               io_lock = fnic_io_lock_hash(fnic, sc);
-               spin_lock_irqsave(io_lock, flags);
+               }
 
                io_req = (struct fnic_io_req *)CMD_SP(sc);
 
@@ -1441,12 +1416,29 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
        unsigned long flags;
        struct scsi_cmnd *sc;
        struct scsi_lun fc_lun;
-       struct fc_rport_libfc_priv *rdata = rport->dd_data;
-       struct fc_lport *lport = rdata->local_port;
-       struct fnic *fnic = lport_priv(lport);
+       struct fc_rport_libfc_priv *rdata;
+       struct fc_lport *lport;
+       struct fnic *fnic;
        struct fc_rport *cmd_rport;
        enum fnic_ioreq_state old_ioreq_state;
 
+       if (!rport) {
+               printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n");
+               return;
+       }
+       rdata = rport->dd_data;
+
+       if (!rdata) {
+               printk(KERN_ERR "fnic_terminate_rport_io: rdata is NULL\n");
+               return;
+       }
+       lport = rdata->local_port;
+
+       if (!lport) {
+               printk(KERN_ERR "fnic_terminate_rport_io: lport is NULL\n");
+               return;
+       }
+       fnic = lport_priv(lport);
        FNIC_SCSI_DBG(KERN_DEBUG,
                      fnic->lport->host, "fnic_terminate_rport_io called"
                      " wwpn 0x%llx, wwnn0x%llx, rport 0x%p, portid 0x%06x\n",
@@ -1456,18 +1448,21 @@ void fnic_terminate_rport_io(struct fc_rport *rport)
        if (fnic->in_remove)
                return;
 
-       for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+       for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
                abt_tag = tag;
+               io_lock = fnic_io_lock_tag(fnic, tag);
+               spin_lock_irqsave(io_lock, flags);
                sc = scsi_host_find_tag(fnic->lport->host, tag);
-               if (!sc)
+               if (!sc) {
+                       spin_unlock_irqrestore(io_lock, flags);
                        continue;
+               }
 
                cmd_rport = starget_to_rport(scsi_target(sc->device));
-               if (rport != cmd_rport)
+               if (rport != cmd_rport) {
+                       spin_unlock_irqrestore(io_lock, flags);
                        continue;
-
-               io_lock = fnic_io_lock_hash(fnic, sc);
-               spin_lock_irqsave(io_lock, flags);
+               }
 
                io_req = (struct fnic_io_req *)CMD_SP(sc);
 
@@ -1680,13 +1675,15 @@ int fnic_abort_cmd(struct scsi_cmnd *sc)
        io_req->abts_done = NULL;
 
        /* fw did not complete abort, timed out */
-       if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+       if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
                spin_unlock_irqrestore(io_lock, flags);
                CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_TIMED_OUT;
                ret = FAILED;
                goto fnic_abort_cmd_end;
        }
 
+       CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+
        /*
         * firmware completed the abort, check the status,
         * free the io_req irrespective of failure or success
@@ -1784,17 +1781,18 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
        DECLARE_COMPLETION_ONSTACK(tm_done);
        enum fnic_ioreq_state old_ioreq_state;
 
-       for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+       for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
+               io_lock = fnic_io_lock_tag(fnic, tag);
+               spin_lock_irqsave(io_lock, flags);
                sc = scsi_host_find_tag(fnic->lport->host, tag);
                /*
                 * ignore this lun reset cmd or cmds that do not belong to
                 * this lun
                 */
-               if (!sc || sc == lr_sc || sc->device != lun_dev)
+               if (!sc || sc == lr_sc || sc->device != lun_dev) {
+                       spin_unlock_irqrestore(io_lock, flags);
                        continue;
-
-               io_lock = fnic_io_lock_hash(fnic, sc);
-               spin_lock_irqsave(io_lock, flags);
+               }
 
                io_req = (struct fnic_io_req *)CMD_SP(sc);
 
@@ -1823,6 +1821,11 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
                        spin_unlock_irqrestore(io_lock, flags);
                        continue;
                }
+
+               if (io_req->abts_done)
+                       shost_printk(KERN_ERR, fnic->lport->host,
+                         "%s: io_req->abts_done is set state is %s\n",
+                         __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
                old_ioreq_state = CMD_STATE(sc);
                /*
                 * Any pending IO issued prior to reset is expected to be
@@ -1833,11 +1836,6 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
                 */
                CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
 
-               if (io_req->abts_done)
-                       shost_printk(KERN_ERR, fnic->lport->host,
-                         "%s: io_req->abts_done is set state is %s\n",
-                         __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
-
                BUG_ON(io_req->abts_done);
 
                abt_tag = tag;
@@ -1890,12 +1888,13 @@ static int fnic_clean_pending_aborts(struct fnic *fnic,
                io_req->abts_done = NULL;
 
                /* if abort is still pending with fw, fail */
-               if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+               if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
                        spin_unlock_irqrestore(io_lock, flags);
                        CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
                        ret = 1;
                        goto clean_pending_aborts_end;
                }
+               CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
                CMD_SP(sc) = NULL;
                spin_unlock_irqrestore(io_lock, flags);
 
@@ -2093,8 +2092,8 @@ int fnic_device_reset(struct scsi_cmnd *sc)
                spin_unlock_irqrestore(io_lock, flags);
                int_to_scsilun(sc->device->lun, &fc_lun);
                /*
-                * Issue abort and terminate on the device reset request.
-                * If q'ing of the abort fails, retry issue it after a delay.
+                * Issue abort and terminate on device reset request.
+                * If q'ing of terminate fails, retry it after a delay.
                 */
                while (1) {
                        spin_lock_irqsave(io_lock, flags);
@@ -2405,7 +2404,7 @@ int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
                lun_dev = lr_sc->device;
 
        /* walk again to check, if IOs are still pending in fw */
-       for (tag = 0; tag < FNIC_MAX_IO_REQ; tag++) {
+       for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
                sc = scsi_host_find_tag(fnic->lport->host, tag);
                /*
                 * ignore this lun reset cmd or cmds that do not belong to
index fbb5536..e343e1d 100644 (file)
@@ -54,8 +54,8 @@
 #define VNIC_FNIC_PLOGI_TIMEOUT_MIN         1000
 #define VNIC_FNIC_PLOGI_TIMEOUT_MAX         255000
 
-#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN     256
-#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX     4096
+#define VNIC_FNIC_IO_THROTTLE_COUNT_MIN     1
+#define VNIC_FNIC_IO_THROTTLE_COUNT_MAX     2048
 
 #define VNIC_FNIC_LINK_DOWN_TIMEOUT_MIN     0
 #define VNIC_FNIC_LINK_DOWN_TIMEOUT_MAX     240000
index fac8cf5..891c86b 100644 (file)
@@ -54,7 +54,7 @@
 #include "hpsa.h"
 
 /* HPSA_DRIVER_VERSION must be 3 byte values (0-255) separated by '.' */
-#define HPSA_DRIVER_VERSION "2.0.2-1"
+#define HPSA_DRIVER_VERSION "3.4.0-1"
 #define DRIVER_NAME "HP HPSA Driver (v " HPSA_DRIVER_VERSION ")"
 #define HPSA "hpsa"
 
@@ -89,13 +89,14 @@ static const struct pci_device_id hpsa_pci_device_id[] = {
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
-       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324a},
-       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324b},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3233},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3350},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3351},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3352},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3353},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x334D},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3354},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3355},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x3356},
@@ -107,7 +108,19 @@ static const struct pci_device_id hpsa_pci_device_id[] = {
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1925},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1926},
        {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1928},
-       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSF,     0x103C, 0x334d},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSH,     0x103C, 0x1929},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21BD},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21BE},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21BF},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C0},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C1},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C2},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C3},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C4},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C5},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C7},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C8},
+       {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSI,     0x103C, 0x21C9},
        {PCI_VENDOR_ID_HP,     PCI_ANY_ID,      PCI_ANY_ID, PCI_ANY_ID,
                PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
        {0,}
@@ -125,24 +138,35 @@ static struct board_type products[] = {
        {0x3245103C, "Smart Array P410i", &SA5_access},
        {0x3247103C, "Smart Array P411", &SA5_access},
        {0x3249103C, "Smart Array P812", &SA5_access},
-       {0x324a103C, "Smart Array P712m", &SA5_access},
-       {0x324b103C, "Smart Array P711m", &SA5_access},
+       {0x324A103C, "Smart Array P712m", &SA5_access},
+       {0x324B103C, "Smart Array P711m", &SA5_access},
        {0x3350103C, "Smart Array P222", &SA5_access},
        {0x3351103C, "Smart Array P420", &SA5_access},
        {0x3352103C, "Smart Array P421", &SA5_access},
        {0x3353103C, "Smart Array P822", &SA5_access},
+       {0x334D103C, "Smart Array P822se", &SA5_access},
        {0x3354103C, "Smart Array P420i", &SA5_access},
        {0x3355103C, "Smart Array P220i", &SA5_access},
        {0x3356103C, "Smart Array P721m", &SA5_access},
-       {0x1920103C, "Smart Array", &SA5_access},
-       {0x1921103C, "Smart Array", &SA5_access},
-       {0x1922103C, "Smart Array", &SA5_access},
-       {0x1923103C, "Smart Array", &SA5_access},
-       {0x1924103C, "Smart Array", &SA5_access},
-       {0x1925103C, "Smart Array", &SA5_access},
-       {0x1926103C, "Smart Array", &SA5_access},
-       {0x1928103C, "Smart Array", &SA5_access},
-       {0x334d103C, "Smart Array P822se", &SA5_access},
+       {0x1921103C, "Smart Array P830i", &SA5_access},
+       {0x1922103C, "Smart Array P430", &SA5_access},
+       {0x1923103C, "Smart Array P431", &SA5_access},
+       {0x1924103C, "Smart Array P830", &SA5_access},
+       {0x1926103C, "Smart Array P731m", &SA5_access},
+       {0x1928103C, "Smart Array P230i", &SA5_access},
+       {0x1929103C, "Smart Array P530", &SA5_access},
+       {0x21BD103C, "Smart Array", &SA5_access},
+       {0x21BE103C, "Smart Array", &SA5_access},
+       {0x21BF103C, "Smart Array", &SA5_access},
+       {0x21C0103C, "Smart Array", &SA5_access},
+       {0x21C1103C, "Smart Array", &SA5_access},
+       {0x21C2103C, "Smart Array", &SA5_access},
+       {0x21C3103C, "Smart Array", &SA5_access},
+       {0x21C4103C, "Smart Array", &SA5_access},
+       {0x21C5103C, "Smart Array", &SA5_access},
+       {0x21C7103C, "Smart Array", &SA5_access},
+       {0x21C8103C, "Smart Array", &SA5_access},
+       {0x21C9103C, "Smart Array", &SA5_access},
        {0xFFFF103C, "Unknown Smart Array", &SA5_access},
 };
 
index 4e31caa..23f5ba5 100644 (file)
@@ -2208,7 +2208,10 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
 
        if (rsp_rc != 0) {
                sdev_printk(KERN_ERR, sdev, "Failed to send cancel event. rc=%d\n", rsp_rc);
-               return -EIO;
+               /* If failure is received, the host adapter is most likely going
+                through reset, return success so the caller will wait for the command
+                being cancelled to get returned */
+               return 0;
        }
 
        sdev_printk(KERN_INFO, sdev, "Cancelling outstanding commands.\n");
@@ -2221,7 +2224,15 @@ static int ibmvfc_cancel_all(struct scsi_device *sdev, int type)
 
        if (status != IBMVFC_MAD_SUCCESS) {
                sdev_printk(KERN_WARNING, sdev, "Cancel failed with rc=%x\n", status);
-               return -EIO;
+               switch (status) {
+               case IBMVFC_MAD_DRIVER_FAILED:
+               case IBMVFC_MAD_CRQ_ERROR:
+                       /* Host adapter most likely going through reset, return success to
+                        the caller will wait for the command being cancelled to get returned */
+                       return 0;
+               default:
+                       return -EIO;
+               };
        }
 
        sdev_printk(KERN_INFO, sdev, "Successfully cancelled outstanding commands\n");
index d0fa4b6..fa76440 100644 (file)
@@ -241,7 +241,7 @@ static void gather_partition_info(void)
        struct device_node *rootdn;
 
        const char *ppartition_name;
-       const unsigned int *p_number_ptr;
+       const __be32 *p_number_ptr;
 
        /* Retrieve information about this partition */
        rootdn = of_find_node_by_path("/");
@@ -255,7 +255,7 @@ static void gather_partition_info(void)
                                sizeof(partition_name));
        p_number_ptr = of_get_property(rootdn, "ibm,partition-no", NULL);
        if (p_number_ptr)
-               partition_number = *p_number_ptr;
+               partition_number = of_read_number(p_number_ptr, 1);
        of_node_put(rootdn);
 }
 
@@ -270,10 +270,11 @@ static void set_adapter_info(struct ibmvscsi_host_data *hostdata)
        strncpy(hostdata->madapter_info.partition_name, partition_name,
                        sizeof(hostdata->madapter_info.partition_name));
 
-       hostdata->madapter_info.partition_number = partition_number;
+       hostdata->madapter_info.partition_number =
+                                       cpu_to_be32(partition_number);
 
-       hostdata->madapter_info.mad_version = 1;
-       hostdata->madapter_info.os_type = 2;
+       hostdata->madapter_info.mad_version = cpu_to_be32(1);
+       hostdata->madapter_info.os_type = cpu_to_be32(2);
 }
 
 /**
@@ -464,9 +465,9 @@ static int initialize_event_pool(struct event_pool *pool,
                memset(&evt->crq, 0x00, sizeof(evt->crq));
                atomic_set(&evt->free, 1);
                evt->crq.valid = 0x80;
-               evt->crq.IU_length = sizeof(*evt->xfer_iu);
-               evt->crq.IU_data_ptr = pool->iu_token + 
-                       sizeof(*evt->xfer_iu) * i;
+               evt->crq.IU_length = cpu_to_be16(sizeof(*evt->xfer_iu));
+               evt->crq.IU_data_ptr = cpu_to_be64(pool->iu_token +
+                       sizeof(*evt->xfer_iu) * i);
                evt->xfer_iu = pool->iu_storage + i;
                evt->hostdata = hostdata;
                evt->ext_list = NULL;
@@ -588,7 +589,7 @@ static void init_event_struct(struct srp_event_struct *evt_struct,
        evt_struct->cmnd_done = NULL;
        evt_struct->sync_srp = NULL;
        evt_struct->crq.format = format;
-       evt_struct->crq.timeout = timeout;
+       evt_struct->crq.timeout = cpu_to_be16(timeout);
        evt_struct->done = done;
 }
 
@@ -659,8 +660,8 @@ static int map_sg_list(struct scsi_cmnd *cmd, int nseg,
 
        scsi_for_each_sg(cmd, sg, nseg, i) {
                struct srp_direct_buf *descr = md + i;
-               descr->va = sg_dma_address(sg);
-               descr->len = sg_dma_len(sg);
+               descr->va = cpu_to_be64(sg_dma_address(sg));
+               descr->len = cpu_to_be32(sg_dma_len(sg));
                descr->key = 0;
                total_length += sg_dma_len(sg);
        }
@@ -703,13 +704,14 @@ static int map_sg_data(struct scsi_cmnd *cmd,
        }
 
        indirect->table_desc.va = 0;
-       indirect->table_desc.len = sg_mapped * sizeof(struct srp_direct_buf);
+       indirect->table_desc.len = cpu_to_be32(sg_mapped *
+                                              sizeof(struct srp_direct_buf));
        indirect->table_desc.key = 0;
 
        if (sg_mapped <= MAX_INDIRECT_BUFS) {
                total_length = map_sg_list(cmd, sg_mapped,
                                           &indirect->desc_list[0]);
-               indirect->len = total_length;
+               indirect->len = cpu_to_be32(total_length);
                return 1;
        }
 
@@ -731,9 +733,10 @@ static int map_sg_data(struct scsi_cmnd *cmd,
 
        total_length = map_sg_list(cmd, sg_mapped, evt_struct->ext_list);
 
-       indirect->len = total_length;
-       indirect->table_desc.va = evt_struct->ext_list_token;
-       indirect->table_desc.len = sg_mapped * sizeof(indirect->desc_list[0]);
+       indirect->len = cpu_to_be32(total_length);
+       indirect->table_desc.va = cpu_to_be64(evt_struct->ext_list_token);
+       indirect->table_desc.len = cpu_to_be32(sg_mapped *
+                                              sizeof(indirect->desc_list[0]));
        memcpy(indirect->desc_list, evt_struct->ext_list,
               MAX_INDIRECT_BUFS * sizeof(struct srp_direct_buf));
        return 1;
@@ -849,7 +852,7 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
                                   struct ibmvscsi_host_data *hostdata,
                                   unsigned long timeout)
 {
-       u64 *crq_as_u64 = (u64 *) &evt_struct->crq;
+       __be64 *crq_as_u64 = (__be64 *)&evt_struct->crq;
        int request_status = 0;
        int rc;
        int srp_req = 0;
@@ -920,8 +923,9 @@ static int ibmvscsi_send_srp_event(struct srp_event_struct *evt_struct,
                add_timer(&evt_struct->timer);
        }
 
-       if ((rc =
-            ibmvscsi_send_crq(hostdata, crq_as_u64[0], crq_as_u64[1])) != 0) {
+       rc = ibmvscsi_send_crq(hostdata, be64_to_cpu(crq_as_u64[0]),
+                              be64_to_cpu(crq_as_u64[1]));
+       if (rc != 0) {
                list_del(&evt_struct->list);
                del_timer(&evt_struct->timer);
 
@@ -987,15 +991,16 @@ static void handle_cmd_rsp(struct srp_event_struct *evt_struct)
                if (((cmnd->result >> 1) & 0x1f) == CHECK_CONDITION)
                        memcpy(cmnd->sense_buffer,
                               rsp->data,
-                              rsp->sense_data_len);
+                              be32_to_cpu(rsp->sense_data_len));
                unmap_cmd_data(&evt_struct->iu.srp.cmd, 
                               evt_struct, 
                               evt_struct->hostdata->dev);
 
                if (rsp->flags & SRP_RSP_FLAG_DOOVER)
-                       scsi_set_resid(cmnd, rsp->data_out_res_cnt);
+                       scsi_set_resid(cmnd,
+                                      be32_to_cpu(rsp->data_out_res_cnt));
                else if (rsp->flags & SRP_RSP_FLAG_DIOVER)
-                       scsi_set_resid(cmnd, rsp->data_in_res_cnt);
+                       scsi_set_resid(cmnd, be32_to_cpu(rsp->data_in_res_cnt));
        }
 
        if (evt_struct->cmnd_done)
@@ -1037,7 +1042,7 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd,
        memset(srp_cmd, 0x00, SRP_MAX_IU_LEN);
        srp_cmd->opcode = SRP_CMD;
        memcpy(srp_cmd->cdb, cmnd->cmnd, sizeof(srp_cmd->cdb));
-       srp_cmd->lun = ((u64) lun) << 48;
+       srp_cmd->lun = cpu_to_be64(((u64)lun) << 48);
 
        if (!map_data_for_srp_cmd(cmnd, evt_struct, srp_cmd, hostdata->dev)) {
                if (!firmware_has_feature(FW_FEATURE_CMO))
@@ -1062,9 +1067,10 @@ static int ibmvscsi_queuecommand_lck(struct scsi_cmnd *cmnd,
        if ((in_fmt == SRP_DATA_DESC_INDIRECT ||
             out_fmt == SRP_DATA_DESC_INDIRECT) &&
            indirect->table_desc.va == 0) {
-               indirect->table_desc.va = evt_struct->crq.IU_data_ptr +
+               indirect->table_desc.va =
+                       cpu_to_be64(be64_to_cpu(evt_struct->crq.IU_data_ptr) +
                        offsetof(struct srp_cmd, add_data) +
-                       offsetof(struct srp_indirect_buf, desc_list);
+                       offsetof(struct srp_indirect_buf, desc_list));
        }
 
        return ibmvscsi_send_srp_event(evt_struct, hostdata, 0);
@@ -1158,7 +1164,7 @@ static void login_rsp(struct srp_event_struct *evt_struct)
         * request_limit could have been set to -1 by this client.
         */
        atomic_set(&hostdata->request_limit,
-                  evt_struct->xfer_iu->srp.login_rsp.req_lim_delta);
+                  be32_to_cpu(evt_struct->xfer_iu->srp.login_rsp.req_lim_delta));
 
        /* If we had any pending I/Os, kick them */
        scsi_unblock_requests(hostdata->host);
@@ -1184,8 +1190,9 @@ static int send_srp_login(struct ibmvscsi_host_data *hostdata)
        login = &evt_struct->iu.srp.login_req;
        memset(login, 0, sizeof(*login));
        login->opcode = SRP_LOGIN_REQ;
-       login->req_it_iu_len = sizeof(union srp_iu);
-       login->req_buf_fmt = SRP_BUF_FORMAT_DIRECT | SRP_BUF_FORMAT_INDIRECT;
+       login->req_it_iu_len = cpu_to_be32(sizeof(union srp_iu));
+       login->req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
+                                        SRP_BUF_FORMAT_INDIRECT);
 
        spin_lock_irqsave(hostdata->host->host_lock, flags);
        /* Start out with a request limit of 0, since this is negotiated in
@@ -1214,12 +1221,13 @@ static void capabilities_rsp(struct srp_event_struct *evt_struct)
                dev_err(hostdata->dev, "error 0x%X getting capabilities info\n",
                        evt_struct->xfer_iu->mad.capabilities.common.status);
        } else {
-               if (hostdata->caps.migration.common.server_support != SERVER_SUPPORTS_CAP)
+               if (hostdata->caps.migration.common.server_support !=
+                   cpu_to_be16(SERVER_SUPPORTS_CAP))
                        dev_info(hostdata->dev, "Partition migration not supported\n");
 
                if (client_reserve) {
                        if (hostdata->caps.reserve.common.server_support ==
-                           SERVER_SUPPORTS_CAP)
+                           cpu_to_be16(SERVER_SUPPORTS_CAP))
                                dev_info(hostdata->dev, "Client reserve enabled\n");
                        else
                                dev_info(hostdata->dev, "Client reserve not supported\n");
@@ -1251,9 +1259,9 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata)
        req = &evt_struct->iu.mad.capabilities;
        memset(req, 0, sizeof(*req));
 
-       hostdata->caps.flags = CAP_LIST_SUPPORTED;
+       hostdata->caps.flags = cpu_to_be32(CAP_LIST_SUPPORTED);
        if (hostdata->client_migrated)
-               hostdata->caps.flags |= CLIENT_MIGRATED;
+               hostdata->caps.flags |= cpu_to_be32(CLIENT_MIGRATED);
 
        strncpy(hostdata->caps.name, dev_name(&hostdata->host->shost_gendev),
                sizeof(hostdata->caps.name));
@@ -1264,22 +1272,31 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata)
        strncpy(hostdata->caps.loc, location, sizeof(hostdata->caps.loc));
        hostdata->caps.loc[sizeof(hostdata->caps.loc) - 1] = '\0';
 
-       req->common.type = VIOSRP_CAPABILITIES_TYPE;
-       req->buffer = hostdata->caps_addr;
+       req->common.type = cpu_to_be32(VIOSRP_CAPABILITIES_TYPE);
+       req->buffer = cpu_to_be64(hostdata->caps_addr);
 
-       hostdata->caps.migration.common.cap_type = MIGRATION_CAPABILITIES;
-       hostdata->caps.migration.common.length = sizeof(hostdata->caps.migration);
-       hostdata->caps.migration.common.server_support = SERVER_SUPPORTS_CAP;
-       hostdata->caps.migration.ecl = 1;
+       hostdata->caps.migration.common.cap_type =
+                               cpu_to_be32(MIGRATION_CAPABILITIES);
+       hostdata->caps.migration.common.length =
+                               cpu_to_be16(sizeof(hostdata->caps.migration));
+       hostdata->caps.migration.common.server_support =
+                               cpu_to_be16(SERVER_SUPPORTS_CAP);
+       hostdata->caps.migration.ecl = cpu_to_be32(1);
 
        if (client_reserve) {
-               hostdata->caps.reserve.common.cap_type = RESERVATION_CAPABILITIES;
-               hostdata->caps.reserve.common.length = sizeof(hostdata->caps.reserve);
-               hostdata->caps.reserve.common.server_support = SERVER_SUPPORTS_CAP;
-               hostdata->caps.reserve.type = CLIENT_RESERVE_SCSI_2;
-               req->common.length = sizeof(hostdata->caps);
+               hostdata->caps.reserve.common.cap_type =
+                                       cpu_to_be32(RESERVATION_CAPABILITIES);
+               hostdata->caps.reserve.common.length =
+                               cpu_to_be16(sizeof(hostdata->caps.reserve));
+               hostdata->caps.reserve.common.server_support =
+                               cpu_to_be16(SERVER_SUPPORTS_CAP);
+               hostdata->caps.reserve.type =
+                               cpu_to_be32(CLIENT_RESERVE_SCSI_2);
+               req->common.length =
+                               cpu_to_be16(sizeof(hostdata->caps));
        } else
-               req->common.length = sizeof(hostdata->caps) - sizeof(hostdata->caps.reserve);
+               req->common.length = cpu_to_be16(sizeof(hostdata->caps) -
+                                               sizeof(hostdata->caps.reserve));
 
        spin_lock_irqsave(hostdata->host->host_lock, flags);
        if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
@@ -1297,7 +1314,7 @@ static void send_mad_capabilities(struct ibmvscsi_host_data *hostdata)
 static void fast_fail_rsp(struct srp_event_struct *evt_struct)
 {
        struct ibmvscsi_host_data *hostdata = evt_struct->hostdata;
-       u8 status = evt_struct->xfer_iu->mad.fast_fail.common.status;
+       u16 status = be16_to_cpu(evt_struct->xfer_iu->mad.fast_fail.common.status);
 
        if (status == VIOSRP_MAD_NOT_SUPPORTED)
                dev_err(hostdata->dev, "fast_fail not supported in server\n");
@@ -1334,8 +1351,8 @@ static int enable_fast_fail(struct ibmvscsi_host_data *hostdata)
 
        fast_fail_mad = &evt_struct->iu.mad.fast_fail;
        memset(fast_fail_mad, 0, sizeof(*fast_fail_mad));
-       fast_fail_mad->common.type = VIOSRP_ENABLE_FAST_FAIL;
-       fast_fail_mad->common.length = sizeof(*fast_fail_mad);
+       fast_fail_mad->common.type = cpu_to_be32(VIOSRP_ENABLE_FAST_FAIL);
+       fast_fail_mad->common.length = cpu_to_be16(sizeof(*fast_fail_mad));
 
        spin_lock_irqsave(hostdata->host->host_lock, flags);
        rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
@@ -1362,15 +1379,15 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
                         "host partition %s (%d), OS %d, max io %u\n",
                         hostdata->madapter_info.srp_version,
                         hostdata->madapter_info.partition_name,
-                        hostdata->madapter_info.partition_number,
-                        hostdata->madapter_info.os_type,
-                        hostdata->madapter_info.port_max_txu[0]);
+                        be32_to_cpu(hostdata->madapter_info.partition_number),
+                        be32_to_cpu(hostdata->madapter_info.os_type),
+                        be32_to_cpu(hostdata->madapter_info.port_max_txu[0]));
                
                if (hostdata->madapter_info.port_max_txu[0]) 
                        hostdata->host->max_sectors = 
-                               hostdata->madapter_info.port_max_txu[0] >> 9;
+                               be32_to_cpu(hostdata->madapter_info.port_max_txu[0]) >> 9;
                
-               if (hostdata->madapter_info.os_type == 3 &&
+               if (be32_to_cpu(hostdata->madapter_info.os_type) == 3 &&
                    strcmp(hostdata->madapter_info.srp_version, "1.6a") <= 0) {
                        dev_err(hostdata->dev, "host (Ver. %s) doesn't support large transfers\n",
                                hostdata->madapter_info.srp_version);
@@ -1379,7 +1396,7 @@ static void adapter_info_rsp(struct srp_event_struct *evt_struct)
                        hostdata->host->sg_tablesize = MAX_INDIRECT_BUFS;
                }
 
-               if (hostdata->madapter_info.os_type == 3) {
+               if (be32_to_cpu(hostdata->madapter_info.os_type) == 3) {
                        enable_fast_fail(hostdata);
                        return;
                }
@@ -1414,9 +1431,9 @@ static void send_mad_adapter_info(struct ibmvscsi_host_data *hostdata)
        req = &evt_struct->iu.mad.adapter_info;
        memset(req, 0x00, sizeof(*req));
        
-       req->common.type = VIOSRP_ADAPTER_INFO_TYPE;
-       req->common.length = sizeof(hostdata->madapter_info);
-       req->buffer = hostdata->adapter_info_addr;
+       req->common.type = cpu_to_be32(VIOSRP_ADAPTER_INFO_TYPE);
+       req->common.length = cpu_to_be16(sizeof(hostdata->madapter_info));
+       req->buffer = cpu_to_be64(hostdata->adapter_info_addr);
 
        spin_lock_irqsave(hostdata->host->host_lock, flags);
        if (ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2))
@@ -1501,7 +1518,7 @@ static int ibmvscsi_eh_abort_handler(struct scsi_cmnd *cmd)
                /* Set up an abort SRP command */
                memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
                tsk_mgmt->opcode = SRP_TSK_MGMT;
-               tsk_mgmt->lun = ((u64) lun) << 48;
+               tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48);
                tsk_mgmt->tsk_mgmt_func = SRP_TSK_ABORT_TASK;
                tsk_mgmt->task_tag = (u64) found_evt;
 
@@ -1624,7 +1641,7 @@ static int ibmvscsi_eh_device_reset_handler(struct scsi_cmnd *cmd)
                /* Set up a lun reset SRP command */
                memset(tsk_mgmt, 0x00, sizeof(*tsk_mgmt));
                tsk_mgmt->opcode = SRP_TSK_MGMT;
-               tsk_mgmt->lun = ((u64) lun) << 48;
+               tsk_mgmt->lun = cpu_to_be64(((u64) lun) << 48);
                tsk_mgmt->tsk_mgmt_func = SRP_TSK_LUN_RESET;
 
                evt->sync_srp = &srp_rsp;
@@ -1735,8 +1752,9 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
 {
        long rc;
        unsigned long flags;
+       /* The hypervisor copies our tag value here so no byteswapping */
        struct srp_event_struct *evt_struct =
-           (struct srp_event_struct *)crq->IU_data_ptr;
+                       (__force struct srp_event_struct *)crq->IU_data_ptr;
        switch (crq->valid) {
        case 0xC0:              /* initialization */
                switch (crq->format) {
@@ -1792,18 +1810,18 @@ static void ibmvscsi_handle_crq(struct viosrp_crq *crq,
         */
        if (!valid_event_struct(&hostdata->pool, evt_struct)) {
                dev_err(hostdata->dev, "returned correlation_token 0x%p is invalid!\n",
-                      (void *)crq->IU_data_ptr);
+                      evt_struct);
                return;
        }
 
        if (atomic_read(&evt_struct->free)) {
                dev_err(hostdata->dev, "received duplicate correlation_token 0x%p!\n",
-                       (void *)crq->IU_data_ptr);
+                       evt_struct);
                return;
        }
 
        if (crq->format == VIOSRP_SRP_FORMAT)
-               atomic_add(evt_struct->xfer_iu->srp.rsp.req_lim_delta,
+               atomic_add(be32_to_cpu(evt_struct->xfer_iu->srp.rsp.req_lim_delta),
                           &hostdata->request_limit);
 
        del_timer(&evt_struct->timer);
@@ -1856,13 +1874,11 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
 
        /* Set up a lun reset SRP command */
        memset(host_config, 0x00, sizeof(*host_config));
-       host_config->common.type = VIOSRP_HOST_CONFIG_TYPE;
-       host_config->common.length = length;
-       host_config->buffer = addr = dma_map_single(hostdata->dev, buffer,
-                                                   length,
-                                                   DMA_BIDIRECTIONAL);
+       host_config->common.type = cpu_to_be32(VIOSRP_HOST_CONFIG_TYPE);
+       host_config->common.length = cpu_to_be16(length);
+       addr = dma_map_single(hostdata->dev, buffer, length, DMA_BIDIRECTIONAL);
 
-       if (dma_mapping_error(hostdata->dev, host_config->buffer)) {
+       if (dma_mapping_error(hostdata->dev, addr)) {
                if (!firmware_has_feature(FW_FEATURE_CMO))
                        dev_err(hostdata->dev,
                                "dma_mapping error getting host config\n");
@@ -1870,6 +1886,8 @@ static int ibmvscsi_do_host_config(struct ibmvscsi_host_data *hostdata,
                return -1;
        }
 
+       host_config->buffer = cpu_to_be64(addr);
+
        init_completion(&evt_struct->comp);
        spin_lock_irqsave(hostdata->host->host_lock, flags);
        rc = ibmvscsi_send_srp_event(evt_struct, hostdata, info_timeout * 2);
index 2cd735d..1162430 100644 (file)
@@ -75,9 +75,9 @@ struct viosrp_crq {
        u8 format;              /* SCSI vs out-of-band */
        u8 reserved;
        u8 status;              /* non-scsi failure? (e.g. DMA failure) */
-       u16 timeout;            /* in seconds */
-       u16 IU_length;          /* in bytes */
-       u64 IU_data_ptr;        /* the TCE for transferring data */
+       __be16 timeout;         /* in seconds */
+       __be16 IU_length;               /* in bytes */
+       __be64 IU_data_ptr;     /* the TCE for transferring data */
 };
 
 /* MADs are Management requests above and beyond the IUs defined in the SRP
@@ -124,10 +124,10 @@ enum viosrp_capability_flag {
  * Common MAD header
  */
 struct mad_common {
-       u32 type;
-       u16 status;
-       u16 length;
-       u64 tag;
+       __be32 type;
+       __be16 status;
+       __be16 length;
+       __be64 tag;
 };
 
 /*
@@ -139,23 +139,23 @@ struct mad_common {
  */
 struct viosrp_empty_iu {
        struct mad_common common;
-       u64 buffer;
-       u32 port;
+       __be64 buffer;
+       __be32 port;
 };
 
 struct viosrp_error_log {
        struct mad_common common;
-       u64 buffer;
+       __be64 buffer;
 };
 
 struct viosrp_adapter_info {
        struct mad_common common;
-       u64 buffer;
+       __be64 buffer;
 };
 
 struct viosrp_host_config {
        struct mad_common common;
-       u64 buffer;
+       __be64 buffer;
 };
 
 struct viosrp_fast_fail {
@@ -164,27 +164,27 @@ struct viosrp_fast_fail {
 
 struct viosrp_capabilities {
        struct mad_common common;
-       u64 buffer;
+       __be64 buffer;
 };
 
 struct mad_capability_common {
-       u32 cap_type;
-       u16 length;
-       u16 server_support;
+       __be32 cap_type;
+       __be16 length;
+       __be16 server_support;
 };
 
 struct mad_reserve_cap {
        struct mad_capability_common common;
-       u32 type;
+       __be32 type;
 };
 
 struct mad_migration_cap {
        struct mad_capability_common common;
-       u32 ecl;
+       __be32 ecl;
 };
 
 struct capabilities{
-       u32 flags;
+       __be32 flags;
        char name[SRP_MAX_LOC_LEN];
        char loc[SRP_MAX_LOC_LEN];
        struct mad_migration_cap migration;
@@ -208,10 +208,10 @@ union viosrp_iu {
 struct mad_adapter_info_data {
        char srp_version[8];
        char partition_name[96];
-       u32 partition_number;
-       u32 mad_version;
-       u32 os_type;
-       u32 port_max_txu[8];    /* per-port maximum transfer */
+       __be32 partition_number;
+       __be32 mad_version;
+       __be32 os_type;
+       __be32 port_max_txu[8]; /* per-port maximum transfer */
 };
 
 #endif
index df43bfe..4e1b75c 100644 (file)
@@ -708,6 +708,7 @@ struct lpfc_hba {
        uint32_t cfg_multi_ring_type;
        uint32_t cfg_poll;
        uint32_t cfg_poll_tmo;
+       uint32_t cfg_task_mgmt_tmo;
        uint32_t cfg_use_msi;
        uint32_t cfg_fcp_imax;
        uint32_t cfg_fcp_cpu_map;
index 16498e0..00656fc 100644 (file)
@@ -1865,8 +1865,10 @@ lpfc_##attr##_set(struct lpfc_vport *vport, uint val) \
 { \
        if (val >= minval && val <= maxval) {\
                lpfc_printf_vlog(vport, KERN_ERR, LOG_INIT, \
-                       "3053 lpfc_" #attr " changed from %d to %d\n", \
-                       vport->cfg_##attr, val); \
+                       "3053 lpfc_" #attr \
+                       " changed from %d (x%x) to %d (x%x)\n", \
+                       vport->cfg_##attr, vport->cfg_##attr, \
+                       val, val); \
                vport->cfg_##attr = val;\
                return 0;\
        }\
@@ -4011,8 +4013,11 @@ LPFC_ATTR_R(ack0, 0, 0, 1, "Enable ACK0 support");
 # For [0], FCP commands are issued to Work Queues ina round robin fashion.
 # For [1], FCP commands are issued to a Work Queue associated with the
 #          current CPU.
+# It would be set to 1 by the driver if it's able to set up cpu affinity
+# for FCP I/Os through Work Queue associated with the current CPU. Otherwise,
+# roundrobin scheduling of FCP I/Os through WQs will be used.
 */
-LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algrithmn for "
+LPFC_ATTR_RW(fcp_io_sched, 0, 0, 1, "Determine scheduling algorithm for "
                "issuing commands [0] - Round Robin, [1] - Current CPU");
 
 /*
@@ -4110,6 +4115,12 @@ LPFC_ATTR_RW(poll_tmo, 10, 1, 255,
             "Milliseconds driver will wait between polling FCP ring");
 
 /*
+# lpfc_task_mgmt_tmo: Maximum time to wait for task management commands
+# to complete in seconds. Value range is [5,180], default value is 60.
+*/
+LPFC_ATTR_RW(task_mgmt_tmo, 60, 5, 180,
+            "Maximum time to wait for task management commands to complete");
+/*
 # lpfc_use_msi: Use MSI (Message Signaled Interrupts) in systems that
 #              support this feature
 #       0  = MSI disabled
@@ -4295,6 +4306,7 @@ struct device_attribute *lpfc_hba_attrs[] = {
        &dev_attr_issue_reset,
        &dev_attr_lpfc_poll,
        &dev_attr_lpfc_poll_tmo,
+       &dev_attr_lpfc_task_mgmt_tmo,
        &dev_attr_lpfc_use_msi,
        &dev_attr_lpfc_fcp_imax,
        &dev_attr_lpfc_fcp_cpu_map,
@@ -5274,6 +5286,7 @@ lpfc_get_cfgparam(struct lpfc_hba *phba)
        lpfc_topology_init(phba, lpfc_topology);
        lpfc_link_speed_init(phba, lpfc_link_speed);
        lpfc_poll_tmo_init(phba, lpfc_poll_tmo);
+       lpfc_task_mgmt_tmo_init(phba, lpfc_task_mgmt_tmo);
        lpfc_enable_npiv_init(phba, lpfc_enable_npiv);
        lpfc_fcf_failover_policy_init(phba, lpfc_fcf_failover_policy);
        lpfc_enable_rrq_init(phba, lpfc_enable_rrq);
index 79c13c3..b92aec9 100644 (file)
@@ -317,6 +317,11 @@ lpfc_bsg_send_mgmt_cmd_cmp(struct lpfc_hba *phba,
        }
        spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 
+       /* Close the timeout handler abort window */
+       spin_lock_irqsave(&phba->hbalock, flags);
+       cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING;
+       spin_unlock_irqrestore(&phba->hbalock, flags);
+
        iocb = &dd_data->context_un.iocb;
        ndlp = iocb->ndlp;
        rmp = iocb->rmp;
@@ -387,6 +392,7 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job)
        int request_nseg;
        int reply_nseg;
        struct bsg_job_data *dd_data;
+       unsigned long flags;
        uint32_t creg_val;
        int rc = 0;
        int iocb_stat;
@@ -501,14 +507,24 @@ lpfc_bsg_send_mgmt_cmd(struct fc_bsg_job *job)
        }
 
        iocb_stat = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, cmdiocbq, 0);
-       if (iocb_stat == IOCB_SUCCESS)
+
+       if (iocb_stat == IOCB_SUCCESS) {
+               spin_lock_irqsave(&phba->hbalock, flags);
+               /* make sure the I/O had not been completed yet */
+               if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) {
+                       /* open up abort window to timeout handler */
+                       cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING;
+               }
+               spin_unlock_irqrestore(&phba->hbalock, flags);
                return 0; /* done for now */
-       else if (iocb_stat == IOCB_BUSY)
+       } else if (iocb_stat == IOCB_BUSY) {
                rc = -EAGAIN;
-       else
+       } else {
                rc = -EIO;
+       }
 
        /* iocb failed so cleanup */
+       job->dd_data = NULL;
 
 free_rmp:
        lpfc_free_bsg_buffers(phba, rmp);
@@ -577,6 +593,11 @@ lpfc_bsg_rport_els_cmp(struct lpfc_hba *phba,
        }
        spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 
+       /* Close the timeout handler abort window */
+       spin_lock_irqsave(&phba->hbalock, flags);
+       cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING;
+       spin_unlock_irqrestore(&phba->hbalock, flags);
+
        rsp = &rspiocbq->iocb;
        pcmd = (struct lpfc_dmabuf *)cmdiocbq->context2;
        prsp = (struct lpfc_dmabuf *)pcmd->list.next;
@@ -639,6 +660,7 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job)
        struct lpfc_iocbq *cmdiocbq;
        uint16_t rpi = 0;
        struct bsg_job_data *dd_data;
+       unsigned long flags;
        uint32_t creg_val;
        int rc = 0;
 
@@ -721,15 +743,25 @@ lpfc_bsg_rport_els(struct fc_bsg_job *job)
 
        rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, cmdiocbq, 0);
 
-       if (rc == IOCB_SUCCESS)
+       if (rc == IOCB_SUCCESS) {
+               spin_lock_irqsave(&phba->hbalock, flags);
+               /* make sure the I/O had not been completed/released */
+               if (cmdiocbq->iocb_flag & LPFC_IO_LIBDFC) {
+                       /* open up abort window to timeout handler */
+                       cmdiocbq->iocb_flag |= LPFC_IO_CMD_OUTSTANDING;
+               }
+               spin_unlock_irqrestore(&phba->hbalock, flags);
                return 0; /* done for now */
-       else if (rc == IOCB_BUSY)
+       } else if (rc == IOCB_BUSY) {
                rc = -EAGAIN;
-       else
+       } else {
                rc = -EIO;
+       }
 
-linkdown_err:
+       /* iocb failed so cleanup */
+       job->dd_data = NULL;
 
+linkdown_err:
        cmdiocbq->context1 = ndlp;
        lpfc_els_free_iocb(phba, cmdiocbq);
 
@@ -1249,7 +1281,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job)
        struct lpfc_hba *phba = vport->phba;
        struct get_ct_event *event_req;
        struct get_ct_event_reply *event_reply;
-       struct lpfc_bsg_event *evt;
+       struct lpfc_bsg_event *evt, *evt_next;
        struct event_data *evt_dat = NULL;
        unsigned long flags;
        uint32_t rc = 0;
@@ -1269,7 +1301,7 @@ lpfc_bsg_hba_get_event(struct fc_bsg_job *job)
        event_reply = (struct get_ct_event_reply *)
                job->reply->reply_data.vendor_reply.vendor_rsp;
        spin_lock_irqsave(&phba->ct_ev_lock, flags);
-       list_for_each_entry(evt, &phba->ct_ev_waiters, node) {
+       list_for_each_entry_safe(evt, evt_next, &phba->ct_ev_waiters, node) {
                if (evt->reg_id == event_req->ev_reg_id) {
                        if (list_empty(&evt->events_to_get))
                                break;
@@ -1370,6 +1402,11 @@ lpfc_issue_ct_rsp_cmp(struct lpfc_hba *phba,
        }
        spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
 
+       /* Close the timeout handler abort window */
+       spin_lock_irqsave(&phba->hbalock, flags);
+       cmdiocbq->iocb_flag &= ~LPFC_IO_CMD_OUTSTANDING;
+       spin_unlock_irqrestore(&phba->hbalock, flags);
+
        ndlp = dd_data->context_un.iocb.ndlp;
        cmp = cmdiocbq->context2;
        bmp = cmdiocbq->context3;
@@ -1433,6 +1470,7 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
        int rc = 0;
        struct lpfc_nodelist *ndlp = NULL;
        struct bsg_job_data *dd_data;
+       unsigned long flags;
        uint32_t creg_val;
 
        /* allocate our bsg tracking structure */
@@ -1542,8 +1580,19 @@ lpfc_issue_ct_rsp(struct lpfc_hba *phba, struct fc_bsg_job *job, uint32_t tag,
 
        rc = lpfc_sli_issue_iocb(phba, LPFC_ELS_RING, ctiocb, 0);
 
-       if (rc == IOCB_SUCCESS)
+       if (rc == IOCB_SUCCESS) {
+               spin_lock_irqsave(&phba->hbalock, flags);
+               /* make sure the I/O had not been completed/released */
+               if (ctiocb->iocb_flag & LPFC_IO_LIBDFC) {
+                       /* open up abort window to timeout handler */
+                       ctiocb->iocb_flag |= LPFC_IO_CMD_OUTSTANDING;
+               }
+               spin_unlock_irqrestore(&phba->hbalock, flags);
                return 0; /* done for now */
+       }
+
+       /* iocb failed so cleanup */
+       job->dd_data = NULL;
 
 issue_ct_rsp_exit:
        lpfc_sli_release_iocbq(phba, ctiocb);
@@ -5284,9 +5333,15 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
                 * remove it from the txq queue and call cancel iocbs.
                 * Otherwise, call abort iotag
                 */
-
                cmdiocb = dd_data->context_un.iocb.cmdiocbq;
-               spin_lock_irq(&phba->hbalock);
+               spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+
+               spin_lock_irqsave(&phba->hbalock, flags);
+               /* make sure the I/O abort window is still open */
+               if (!(cmdiocb->iocb_flag & LPFC_IO_CMD_OUTSTANDING)) {
+                       spin_unlock_irqrestore(&phba->hbalock, flags);
+                       return -EAGAIN;
+               }
                list_for_each_entry_safe(check_iocb, next_iocb, &pring->txq,
                                         list) {
                        if (check_iocb == cmdiocb) {
@@ -5296,8 +5351,7 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
                }
                if (list_empty(&completions))
                        lpfc_sli_issue_abort_iotag(phba, pring, cmdiocb);
-               spin_unlock_irq(&phba->hbalock);
-               spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+               spin_unlock_irqrestore(&phba->hbalock, flags);
                if (!list_empty(&completions)) {
                        lpfc_sli_cancel_iocbs(phba, &completions,
                                              IOSTAT_LOCAL_REJECT,
@@ -5321,9 +5375,10 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
                 * remove it from the txq queue and call cancel iocbs.
                 * Otherwise, call abort iotag.
                 */
-
                cmdiocb = dd_data->context_un.menlo.cmdiocbq;
-               spin_lock_irq(&phba->hbalock);
+               spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+
+               spin_lock_irqsave(&phba->hbalock, flags);
                list_for_each_entry_safe(check_iocb, next_iocb, &pring->txq,
                                         list) {
                        if (check_iocb == cmdiocb) {
@@ -5333,8 +5388,7 @@ lpfc_bsg_timeout(struct fc_bsg_job *job)
                }
                if (list_empty(&completions))
                        lpfc_sli_issue_abort_iotag(phba, pring, cmdiocb);
-               spin_unlock_irq(&phba->hbalock);
-               spin_unlock_irqrestore(&phba->ct_ev_lock, flags);
+               spin_unlock_irqrestore(&phba->hbalock, flags);
                if (!list_empty(&completions)) {
                        lpfc_sli_cancel_iocbs(phba, &completions,
                                              IOSTAT_LOCAL_REJECT,
index 60d6ca2..7801601 100644 (file)
@@ -4437,6 +4437,7 @@ lpfc_nlp_logo_unreg(struct lpfc_hba *phba, LPFC_MBOXQ_t *pmb)
        if (!ndlp)
                return;
        lpfc_issue_els_logo(vport, ndlp, 0);
+       mempool_free(pmb, phba->mbox_mem_pool);
 }
 
 /*
@@ -4456,7 +4457,15 @@ lpfc_unreg_rpi(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp)
        int rc;
        uint16_t rpi;
 
-       if (ndlp->nlp_flag & NLP_RPI_REGISTERED) {
+       if (ndlp->nlp_flag & NLP_RPI_REGISTERED ||
+           ndlp->nlp_flag & NLP_REG_LOGIN_SEND) {
+               if (ndlp->nlp_flag & NLP_REG_LOGIN_SEND)
+                       lpfc_printf_vlog(vport, KERN_INFO, LOG_SLI,
+                                        "3366 RPI x%x needs to be "
+                                        "unregistered nlp_flag x%x "
+                                        "did x%x\n",
+                                        ndlp->nlp_rpi, ndlp->nlp_flag,
+                                        ndlp->nlp_DID);
                mbox = mempool_alloc(phba->mbox_mem_pool, GFP_KERNEL);
                if (mbox) {
                        /* SLI4 ports require the physical rpi value. */
index 501147c..647f5bf 100644 (file)
@@ -3031,10 +3031,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
                        phba->sli4_hba.scsi_xri_max);
 
        spin_lock_irq(&phba->scsi_buf_list_get_lock);
-       spin_lock_irq(&phba->scsi_buf_list_put_lock);
+       spin_lock(&phba->scsi_buf_list_put_lock);
        list_splice_init(&phba->lpfc_scsi_buf_list_get, &scsi_sgl_list);
        list_splice(&phba->lpfc_scsi_buf_list_put, &scsi_sgl_list);
-       spin_unlock_irq(&phba->scsi_buf_list_put_lock);
+       spin_unlock(&phba->scsi_buf_list_put_lock);
        spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 
        if (phba->sli4_hba.scsi_xri_cnt > phba->sli4_hba.scsi_xri_max) {
@@ -3070,10 +3070,10 @@ lpfc_sli4_xri_sgl_update(struct lpfc_hba *phba)
                psb->cur_iocbq.sli4_xritag = phba->sli4_hba.xri_ids[lxri];
        }
        spin_lock_irq(&phba->scsi_buf_list_get_lock);
-       spin_lock_irq(&phba->scsi_buf_list_put_lock);
+       spin_lock(&phba->scsi_buf_list_put_lock);
        list_splice_init(&scsi_sgl_list, &phba->lpfc_scsi_buf_list_get);
        INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-       spin_unlock_irq(&phba->scsi_buf_list_put_lock);
+       spin_unlock(&phba->scsi_buf_list_put_lock);
        spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 
        return 0;
@@ -4859,6 +4859,9 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
        struct lpfc_mqe *mqe;
        int longs;
 
+       /* Get all the module params for configuring this host */
+       lpfc_get_cfgparam(phba);
+
        /* Before proceed, wait for POST done and device ready */
        rc = lpfc_sli4_post_status_check(phba);
        if (rc)
@@ -4902,15 +4905,6 @@ lpfc_sli4_driver_resource_setup(struct lpfc_hba *phba)
                sizeof(struct lpfc_mbox_ext_buf_ctx));
        INIT_LIST_HEAD(&phba->mbox_ext_buf_ctx.ext_dmabuf_list);
 
-       /*
-        * We need to do a READ_CONFIG mailbox command here before
-        * calling lpfc_get_cfgparam. For VFs this will report the
-        * MAX_XRI, MAX_VPI, MAX_RPI, MAX_IOCB, and MAX_VFI settings.
-        * All of the resources allocated
-        * for this Port are tied to these values.
-        */
-       /* Get all the module params for configuring this host */
-       lpfc_get_cfgparam(phba);
        phba->max_vpi = LPFC_MAX_VPI;
 
        /* This will be set to correct value after the read_config mbox */
@@ -7141,19 +7135,6 @@ lpfc_sli4_queue_destroy(struct lpfc_hba *phba)
                phba->sli4_hba.fcp_wq = NULL;
        }
 
-       if (phba->pci_bar0_memmap_p) {
-               iounmap(phba->pci_bar0_memmap_p);
-               phba->pci_bar0_memmap_p = NULL;
-       }
-       if (phba->pci_bar2_memmap_p) {
-               iounmap(phba->pci_bar2_memmap_p);
-               phba->pci_bar2_memmap_p = NULL;
-       }
-       if (phba->pci_bar4_memmap_p) {
-               iounmap(phba->pci_bar4_memmap_p);
-               phba->pci_bar4_memmap_p = NULL;
-       }
-
        /* Release FCP CQ mapping array */
        if (phba->sli4_hba.fcp_cq_map != NULL) {
                kfree(phba->sli4_hba.fcp_cq_map);
@@ -7942,9 +7923,9 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
         * particular PCI BARs regions is dependent on the type of
         * SLI4 device.
         */
-       if (pci_resource_start(pdev, 0)) {
-               phba->pci_bar0_map = pci_resource_start(pdev, 0);
-               bar0map_len = pci_resource_len(pdev, 0);
+       if (pci_resource_start(pdev, PCI_64BIT_BAR0)) {
+               phba->pci_bar0_map = pci_resource_start(pdev, PCI_64BIT_BAR0);
+               bar0map_len = pci_resource_len(pdev, PCI_64BIT_BAR0);
 
                /*
                 * Map SLI4 PCI Config Space Register base to a kernel virtual
@@ -7958,6 +7939,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                                   "registers.\n");
                        goto out;
                }
+               phba->pci_bar0_memmap_p = phba->sli4_hba.conf_regs_memmap_p;
                /* Set up BAR0 PCI config space register memory map */
                lpfc_sli4_bar0_register_memmap(phba, if_type);
        } else {
@@ -7980,13 +7962,13 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
        }
 
        if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) &&
-           (pci_resource_start(pdev, 2))) {
+           (pci_resource_start(pdev, PCI_64BIT_BAR2))) {
                /*
                 * Map SLI4 if type 0 HBA Control Register base to a kernel
                 * virtual address and setup the registers.
                 */
-               phba->pci_bar1_map = pci_resource_start(pdev, 2);
-               bar1map_len = pci_resource_len(pdev, 2);
+               phba->pci_bar1_map = pci_resource_start(pdev, PCI_64BIT_BAR2);
+               bar1map_len = pci_resource_len(pdev, PCI_64BIT_BAR2);
                phba->sli4_hba.ctrl_regs_memmap_p =
                                ioremap(phba->pci_bar1_map, bar1map_len);
                if (!phba->sli4_hba.ctrl_regs_memmap_p) {
@@ -7994,17 +7976,18 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                           "ioremap failed for SLI4 HBA control registers.\n");
                        goto out_iounmap_conf;
                }
+               phba->pci_bar2_memmap_p = phba->sli4_hba.ctrl_regs_memmap_p;
                lpfc_sli4_bar1_register_memmap(phba);
        }
 
        if ((if_type == LPFC_SLI_INTF_IF_TYPE_0) &&
-           (pci_resource_start(pdev, 4))) {
+           (pci_resource_start(pdev, PCI_64BIT_BAR4))) {
                /*
                 * Map SLI4 if type 0 HBA Doorbell Register base to a kernel
                 * virtual address and setup the registers.
                 */
-               phba->pci_bar2_map = pci_resource_start(pdev, 4);
-               bar2map_len = pci_resource_len(pdev, 4);
+               phba->pci_bar2_map = pci_resource_start(pdev, PCI_64BIT_BAR4);
+               bar2map_len = pci_resource_len(pdev, PCI_64BIT_BAR4);
                phba->sli4_hba.drbl_regs_memmap_p =
                                ioremap(phba->pci_bar2_map, bar2map_len);
                if (!phba->sli4_hba.drbl_regs_memmap_p) {
@@ -8012,6 +7995,7 @@ lpfc_sli4_pci_mem_setup(struct lpfc_hba *phba)
                           "ioremap failed for SLI4 HBA doorbell registers.\n");
                        goto out_iounmap_ctrl;
                }
+               phba->pci_bar4_memmap_p = phba->sli4_hba.drbl_regs_memmap_p;
                error = lpfc_sli4_bar2_register_memmap(phba, LPFC_VF0);
                if (error)
                        goto out_iounmap_all;
@@ -8405,7 +8389,8 @@ static int
 lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 {
        int i, idx, saved_chann, used_chann, cpu, phys_id;
-       int max_phys_id, num_io_channel, first_cpu;
+       int max_phys_id, min_phys_id;
+       int num_io_channel, first_cpu, chan;
        struct lpfc_vector_map_info *cpup;
 #ifdef CONFIG_X86
        struct cpuinfo_x86 *cpuinfo;
@@ -8423,6 +8408,7 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
                phba->sli4_hba.num_present_cpu));
 
        max_phys_id = 0;
+       min_phys_id = 0xff;
        phys_id = 0;
        num_io_channel = 0;
        first_cpu = LPFC_VECTOR_MAP_EMPTY;
@@ -8446,9 +8432,12 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
 
                if (cpup->phys_id > max_phys_id)
                        max_phys_id = cpup->phys_id;
+               if (cpup->phys_id < min_phys_id)
+                       min_phys_id = cpup->phys_id;
                cpup++;
        }
 
+       phys_id = min_phys_id;
        /* Now associate the HBA vectors with specific CPUs */
        for (idx = 0; idx < vectors; idx++) {
                cpup = phba->sli4_hba.cpu_map;
@@ -8459,13 +8448,25 @@ lpfc_sli4_set_affinity(struct lpfc_hba *phba, int vectors)
                        for (i = 1; i < max_phys_id; i++) {
                                phys_id++;
                                if (phys_id > max_phys_id)
-                                       phys_id = 0;
+                                       phys_id = min_phys_id;
                                cpu = lpfc_find_next_cpu(phba, phys_id);
                                if (cpu == LPFC_VECTOR_MAP_EMPTY)
                                        continue;
                                goto found;
                        }
 
+                       /* Use round robin for scheduling */
+                       phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_ROUND_ROBIN;
+                       chan = 0;
+                       cpup = phba->sli4_hba.cpu_map;
+                       for (i = 0; i < phba->sli4_hba.num_present_cpu; i++) {
+                               cpup->channel_id = chan;
+                               cpup++;
+                               chan++;
+                               if (chan >= phba->cfg_fcp_io_channel)
+                                       chan = 0;
+                       }
+
                        lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                        "3329 Cannot set affinity:"
                                        "Error mapping vector %d (%d)\n",
@@ -8503,7 +8504,7 @@ found:
                /* Spread vector mapping across multple physical CPU nodes */
                phys_id++;
                if (phys_id > max_phys_id)
-                       phys_id = 0;
+                       phys_id = min_phys_id;
        }
 
        /*
@@ -8513,7 +8514,7 @@ found:
         * Base the remaining IO channel assigned, to IO channels already
         * assigned to other CPUs on the same phys_id.
         */
-       for (i = 0; i <= max_phys_id; i++) {
+       for (i = min_phys_id; i <= max_phys_id; i++) {
                /*
                 * If there are no io channels already mapped to
                 * this phys_id, just round robin thru the io_channels.
@@ -8595,10 +8596,11 @@ out:
        if (num_io_channel != phba->sli4_hba.num_present_cpu)
                lpfc_printf_log(phba, KERN_ERR, LOG_INIT,
                                "3333 Set affinity mismatch:"
-                               "%d chann != %d cpus: %d vactors\n",
+                               "%d chann != %d cpus: %d vectors\n",
                                num_io_channel, phba->sli4_hba.num_present_cpu,
                                vectors);
 
+       /* Enable using cpu affinity for scheduling */
        phba->cfg_fcp_io_sched = LPFC_FCP_SCHED_BY_CPU;
        return 1;
 }
@@ -8689,9 +8691,12 @@ enable_msix_vectors:
 
 cfg_fail_out:
        /* free the irq already requested */
-       for (--index; index >= 0; index--)
+       for (--index; index >= 0; index--) {
+               irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
+                                         vector, NULL);
                free_irq(phba->sli4_hba.msix_entries[index].vector,
                         &phba->sli4_hba.fcp_eq_hdl[index]);
+       }
 
 msi_fail_out:
        /* Unconfigure MSI-X capability structure */
@@ -8712,9 +8717,12 @@ lpfc_sli4_disable_msix(struct lpfc_hba *phba)
        int index;
 
        /* Free up MSI-X multi-message vectors */
-       for (index = 0; index < phba->cfg_fcp_io_channel; index++)
+       for (index = 0; index < phba->cfg_fcp_io_channel; index++) {
+               irq_set_affinity_hint(phba->sli4_hba.msix_entries[index].
+                                         vector, NULL);
                free_irq(phba->sli4_hba.msix_entries[index].vector,
                         &phba->sli4_hba.fcp_eq_hdl[index]);
+       }
 
        /* Disable MSI-X */
        pci_disable_msix(phba->pcidev);
index 1242b6c..c913e8c 100644 (file)
@@ -926,10 +926,10 @@ lpfc_sli4_repost_scsi_sgl_list(struct lpfc_hba *phba)
 
        /* get all SCSI buffers need to repost to a local list */
        spin_lock_irq(&phba->scsi_buf_list_get_lock);
-       spin_lock_irq(&phba->scsi_buf_list_put_lock);
+       spin_lock(&phba->scsi_buf_list_put_lock);
        list_splice_init(&phba->lpfc_scsi_buf_list_get, &post_sblist);
        list_splice(&phba->lpfc_scsi_buf_list_put, &post_sblist);
-       spin_unlock_irq(&phba->scsi_buf_list_put_lock);
+       spin_unlock(&phba->scsi_buf_list_put_lock);
        spin_unlock_irq(&phba->scsi_buf_list_get_lock);
 
        /* post the list of scsi buffer sgls to port if available */
@@ -1000,9 +1000,12 @@ lpfc_new_scsi_buf_s4(struct lpfc_vport *vport, int num_to_alloc)
                }
                memset(psb->data, 0, phba->cfg_sg_dma_buf_size);
 
-               /* Page alignment is CRITICAL, double check to be sure */
-               if (((unsigned long)(psb->data) &
-                   (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0) {
+               /*
+                * 4K Page alignment is CRITICAL to BlockGuard, double check
+                * to be sure.
+                */
+               if (phba->cfg_enable_bg  && (((unsigned long)(psb->data) &
+                   (unsigned long)(SLI4_PAGE_SIZE - 1)) != 0)) {
                        pci_pool_free(phba->lpfc_scsi_dma_buf_pool,
                                      psb->data, psb->dma_handle);
                        kfree(psb);
@@ -1134,22 +1137,21 @@ lpfc_get_scsi_buf_s3(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
        struct  lpfc_scsi_buf * lpfc_cmd = NULL;
        struct list_head *scsi_buf_list_get = &phba->lpfc_scsi_buf_list_get;
-       unsigned long gflag = 0;
-       unsigned long pflag = 0;
+       unsigned long iflag = 0;
 
-       spin_lock_irqsave(&phba->scsi_buf_list_get_lock, gflag);
+       spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag);
        list_remove_head(scsi_buf_list_get, lpfc_cmd, struct lpfc_scsi_buf,
                         list);
        if (!lpfc_cmd) {
-               spin_lock_irqsave(&phba->scsi_buf_list_put_lock, pflag);
+               spin_lock(&phba->scsi_buf_list_put_lock);
                list_splice(&phba->lpfc_scsi_buf_list_put,
                            &phba->lpfc_scsi_buf_list_get);
                INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
                list_remove_head(scsi_buf_list_get, lpfc_cmd,
                                 struct lpfc_scsi_buf, list);
-               spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, pflag);
+               spin_unlock(&phba->scsi_buf_list_put_lock);
        }
-       spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, gflag);
+       spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag);
        return  lpfc_cmd;
 }
 /**
@@ -1167,11 +1169,10 @@ static struct lpfc_scsi_buf*
 lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
 {
        struct lpfc_scsi_buf *lpfc_cmd, *lpfc_cmd_next;
-       unsigned long gflag = 0;
-       unsigned long pflag = 0;
+       unsigned long iflag = 0;
        int found = 0;
 
-       spin_lock_irqsave(&phba->scsi_buf_list_get_lock, gflag);
+       spin_lock_irqsave(&phba->scsi_buf_list_get_lock, iflag);
        list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
                                 &phba->lpfc_scsi_buf_list_get, list) {
                if (lpfc_test_rrq_active(phba, ndlp,
@@ -1182,11 +1183,11 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
                break;
        }
        if (!found) {
-               spin_lock_irqsave(&phba->scsi_buf_list_put_lock, pflag);
+               spin_lock(&phba->scsi_buf_list_put_lock);
                list_splice(&phba->lpfc_scsi_buf_list_put,
                            &phba->lpfc_scsi_buf_list_get);
                INIT_LIST_HEAD(&phba->lpfc_scsi_buf_list_put);
-               spin_unlock_irqrestore(&phba->scsi_buf_list_put_lock, pflag);
+               spin_unlock(&phba->scsi_buf_list_put_lock);
                list_for_each_entry_safe(lpfc_cmd, lpfc_cmd_next,
                                         &phba->lpfc_scsi_buf_list_get, list) {
                        if (lpfc_test_rrq_active(
@@ -1197,7 +1198,7 @@ lpfc_get_scsi_buf_s4(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp)
                        break;
                }
        }
-       spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, gflag);
+       spin_unlock_irqrestore(&phba->scsi_buf_list_get_lock, iflag);
        if (!found)
                return NULL;
        return  lpfc_cmd;
@@ -3966,11 +3967,11 @@ lpfc_handle_fcp_err(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
 
        /*
         * Check SLI validation that all the transfer was actually done
-        * (fcpi_parm should be zero).
+        * (fcpi_parm should be zero). Apply check only to reads.
         */
-       } else if (fcpi_parm) {
+       } else if (fcpi_parm && (cmnd->sc_data_direction == DMA_FROM_DEVICE)) {
                lpfc_printf_vlog(vport, KERN_WARNING, LOG_FCP | LOG_FCP_ERROR,
-                                "9029 FCP Data Transfer Check Error: "
+                                "9029 FCP Read Check Error Data: "
                                 "x%x x%x x%x x%x x%x\n",
                                 be32_to_cpu(fcpcmd->fcpDl),
                                 be32_to_cpu(fcprsp->rspResId),
@@ -4342,6 +4343,7 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
        char tag[2];
        uint8_t *ptr;
        bool sli4;
+       uint32_t fcpdl;
 
        if (!pnode || !NLP_CHK_NODE_ACT(pnode))
                return;
@@ -4389,8 +4391,12 @@ lpfc_scsi_prep_cmnd(struct lpfc_vport *vport, struct lpfc_scsi_buf *lpfc_cmd,
                        iocb_cmd->ulpPU = PARM_READ_CHECK;
                        if (vport->cfg_first_burst_size &&
                            (pnode->nlp_flag & NLP_FIRSTBURST)) {
-                               piocbq->iocb.un.fcpi.fcpi_XRdy =
-                                       vport->cfg_first_burst_size;
+                               fcpdl = scsi_bufflen(scsi_cmnd);
+                               if (fcpdl < vport->cfg_first_burst_size)
+                                       piocbq->iocb.un.fcpi.fcpi_XRdy = fcpdl;
+                               else
+                                       piocbq->iocb.un.fcpi.fcpi_XRdy =
+                                               vport->cfg_first_burst_size;
                        }
                        fcp_cmnd->fcpCntl3 = WRITE_DATA;
                        phba->fc4OutputRequests++;
@@ -4878,6 +4884,9 @@ lpfc_abort_handler(struct scsi_cmnd *cmnd)
                goto out_unlock;
        }
 
+       /* Indicate the IO is being aborted by the driver. */
+       iocb->iocb_flag |= LPFC_DRIVER_ABORTED;
+
        /*
         * The scsi command can not be in txq and it is in flight because the
         * pCmd is still pointig at the SCSI command we have to abort. There
@@ -5006,7 +5015,7 @@ lpfc_send_taskmgmt(struct lpfc_vport *vport, struct lpfc_rport_data *rdata,
        lpfc_cmd = lpfc_get_scsi_buf(phba, rdata->pnode);
        if (lpfc_cmd == NULL)
                return FAILED;
-       lpfc_cmd->timeout = 60;
+       lpfc_cmd->timeout = phba->cfg_task_mgmt_tmo;
        lpfc_cmd->rdata = rdata;
 
        status = lpfc_scsi_prep_task_mgmt_cmd(vport, lpfc_cmd, lun_id,
index 0392e11..612f489 100644 (file)
@@ -9831,6 +9831,13 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
                                               abort_cmd) != 0)
                        continue;
 
+               /*
+                * If the iocbq is already being aborted, don't take a second
+                * action, but do count it.
+                */
+               if (iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+                       continue;
+
                /* issue ABTS for this IOCB based on iotag */
                abtsiocb = lpfc_sli_get_iocbq(phba);
                if (abtsiocb == NULL) {
@@ -9838,6 +9845,9 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
                        continue;
                }
 
+               /* indicate the IO is being aborted by the driver. */
+               iocbq->iocb_flag |= LPFC_DRIVER_ABORTED;
+
                cmd = &iocbq->iocb;
                abtsiocb->iocb.un.acxri.abortType = ABORT_TYPE_ABTS;
                abtsiocb->iocb.un.acxri.abortContextTag = cmd->ulpContext;
@@ -9847,7 +9857,7 @@ lpfc_sli_abort_iocb(struct lpfc_vport *vport, struct lpfc_sli_ring *pring,
                        abtsiocb->iocb.un.acxri.abortIoTag = cmd->ulpIoTag;
                abtsiocb->iocb.ulpLe = 1;
                abtsiocb->iocb.ulpClass = cmd->ulpClass;
-               abtsiocb->vport = phba->pport;
+               abtsiocb->vport = vport;
 
                /* ABTS WQE must go to the same WQ as the WQE to be aborted */
                abtsiocb->fcp_wqidx = iocbq->fcp_wqidx;
@@ -12233,7 +12243,6 @@ static void __iomem *
 lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 {
        struct pci_dev *pdev;
-       unsigned long bar_map, bar_map_len;
 
        if (!phba->pcidev)
                return NULL;
@@ -12242,25 +12251,10 @@ lpfc_dual_chute_pci_bar_map(struct lpfc_hba *phba, uint16_t pci_barset)
 
        switch (pci_barset) {
        case WQ_PCI_BAR_0_AND_1:
-               if (!phba->pci_bar0_memmap_p) {
-                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR0);
-                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR0);
-                       phba->pci_bar0_memmap_p = ioremap(bar_map, bar_map_len);
-               }
                return phba->pci_bar0_memmap_p;
        case WQ_PCI_BAR_2_AND_3:
-               if (!phba->pci_bar2_memmap_p) {
-                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR2);
-                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR2);
-                       phba->pci_bar2_memmap_p = ioremap(bar_map, bar_map_len);
-               }
                return phba->pci_bar2_memmap_p;
        case WQ_PCI_BAR_4_AND_5:
-               if (!phba->pci_bar4_memmap_p) {
-                       bar_map = pci_resource_start(pdev, PCI_64BIT_BAR4);
-                       bar_map_len = pci_resource_len(pdev, PCI_64BIT_BAR4);
-                       phba->pci_bar4_memmap_p = ioremap(bar_map, bar_map_len);
-               }
                return phba->pci_bar4_memmap_p;
        default:
                break;
@@ -15808,7 +15802,7 @@ lpfc_sli4_fcf_rr_index_set(struct lpfc_hba *phba, uint16_t fcf_index)
 void
 lpfc_sli4_fcf_rr_index_clear(struct lpfc_hba *phba, uint16_t fcf_index)
 {
-       struct lpfc_fcf_pri *fcf_pri;
+       struct lpfc_fcf_pri *fcf_pri, *fcf_pri_next;
        if (fcf_index >= LPFC_SLI4_FCF_TBL_INDX_MAX) {
                lpfc_printf_log(phba, KERN_ERR, LOG_FIP,
                                "2762 FCF (x%x) reached driver's book "
@@ -15818,7 +15812,8 @@ lpfc_sli4_fcf_rr_index_clear(struct lpfc_hba *phba, uint16_t fcf_index)
        }
        /* Clear the eligible FCF record index bmask */
        spin_lock_irq(&phba->hbalock);
-       list_for_each_entry(fcf_pri, &phba->fcf.fcf_pri_list, list) {
+       list_for_each_entry_safe(fcf_pri, fcf_pri_next, &phba->fcf.fcf_pri_list,
+                                list) {
                if (fcf_pri->fcf_rec.fcf_index == fcf_index) {
                        list_del_init(&fcf_pri->list);
                        break;
index 9761799..6b0f247 100644 (file)
@@ -58,7 +58,7 @@ struct lpfc_iocbq {
 
        IOCB_t iocb;            /* IOCB cmd */
        uint8_t retry;          /* retry counter for IOCB cmd - if needed */
-       uint16_t iocb_flag;
+       uint32_t iocb_flag;
 #define LPFC_IO_LIBDFC         1       /* libdfc iocb */
 #define LPFC_IO_WAKE           2       /* Synchronous I/O completed */
 #define LPFC_IO_WAKE_TMO       LPFC_IO_WAKE /* Synchronous I/O timed out */
@@ -73,11 +73,11 @@ struct lpfc_iocbq {
 #define LPFC_IO_DIF_PASS       0x400   /* T10 DIF IO pass-thru prot */
 #define LPFC_IO_DIF_STRIP      0x800   /* T10 DIF IO strip prot */
 #define LPFC_IO_DIF_INSERT     0x1000  /* T10 DIF IO insert prot */
+#define LPFC_IO_CMD_OUTSTANDING        0x2000 /* timeout handler abort window */
 
 #define LPFC_FIP_ELS_ID_MASK   0xc000  /* ELS_ID range 0-3, non-shifted mask */
 #define LPFC_FIP_ELS_ID_SHIFT  14
 
-       uint8_t rsvd2;
        uint32_t drvrTimeout;   /* driver timeout in seconds */
        uint32_t fcp_wqidx;     /* index to FCP work queue */
        struct lpfc_vport *vport;/* virtual port pointer */
index 5bcc382..85120b7 100644 (file)
@@ -523,7 +523,7 @@ struct lpfc_sli4_hba {
        struct lpfc_queue *hdr_rq; /* Slow-path Header Receive queue */
        struct lpfc_queue *dat_rq; /* Slow-path Data Receive queue */
 
-       uint8_t fw_func_mode;   /* FW function protocol mode */
+       uint32_t fw_func_mode;  /* FW function protocol mode */
        uint32_t ulp0_mode;     /* ULP0 protocol mode */
        uint32_t ulp1_mode;     /* ULP1 protocol mode */
 
index 21859d2..f58f183 100644 (file)
@@ -18,7 +18,7 @@
  * included with this package.                                     *
  *******************************************************************/
 
-#define LPFC_DRIVER_VERSION "8.3.41"
+#define LPFC_DRIVER_VERSION "8.3.42"
 #define LPFC_DRIVER_NAME               "lpfc"
 
 /* Used for SLI 2/3 */
index 04a42a5..0c73ba4 100644 (file)
@@ -33,9 +33,9 @@
 /*
  * MegaRAID SAS Driver meta data
  */
-#define MEGASAS_VERSION                                "06.600.18.00-rc1"
-#define MEGASAS_RELDATE                                "May. 15, 2013"
-#define MEGASAS_EXT_VERSION                    "Wed. May. 15 17:00:00 PDT 2013"
+#define MEGASAS_VERSION                                "06.700.06.00-rc1"
+#define MEGASAS_RELDATE                                "Aug. 31, 2013"
+#define MEGASAS_EXT_VERSION                    "Sat. Aug. 31 17:00:00 PDT 2013"
 
 /*
  * Device IDs
 
 #define MR_DCMD_CTRL_GET_INFO                  0x01010000
 #define MR_DCMD_LD_GET_LIST                    0x03010000
+#define MR_DCMD_LD_LIST_QUERY                  0x03010100
 
 #define MR_DCMD_CTRL_CACHE_FLUSH               0x01101000
 #define MR_FLUSH_CTRL_CACHE                    0x01
@@ -345,6 +346,15 @@ enum MR_PD_QUERY_TYPE {
        MR_PD_QUERY_TYPE_EXPOSED_TO_HOST    = 5,
 };
 
+enum MR_LD_QUERY_TYPE {
+       MR_LD_QUERY_TYPE_ALL             = 0,
+       MR_LD_QUERY_TYPE_EXPOSED_TO_HOST = 1,
+       MR_LD_QUERY_TYPE_USED_TGT_IDS    = 2,
+       MR_LD_QUERY_TYPE_CLUSTER_ACCESS  = 3,
+       MR_LD_QUERY_TYPE_CLUSTER_LOCALE  = 4,
+};
+
+
 #define MR_EVT_CFG_CLEARED                              0x0004
 #define MR_EVT_LD_STATE_CHANGE                          0x0051
 #define MR_EVT_PD_INSERTED                              0x005b
@@ -435,6 +445,14 @@ struct MR_LD_LIST {
        } ldList[MAX_LOGICAL_DRIVES];
 } __packed;
 
+struct MR_LD_TARGETID_LIST {
+       u32     size;
+       u32     count;
+       u8      pad[3];
+       u8      targetId[MAX_LOGICAL_DRIVES];
+};
+
+
 /*
  * SAS controller properties
  */
@@ -474,21 +492,39 @@ struct megasas_ctrl_prop {
        * a bit in the following structure.
        */
        struct {
-               u32     copyBackDisabled            : 1;
-               u32     SMARTerEnabled              : 1;
-               u32     prCorrectUnconfiguredAreas  : 1;
-               u32     useFdeOnly                  : 1;
-               u32     disableNCQ                  : 1;
-               u32     SSDSMARTerEnabled           : 1;
-               u32     SSDPatrolReadEnabled        : 1;
-               u32     enableSpinDownUnconfigured  : 1;
-               u32     autoEnhancedImport          : 1;
-               u32     enableSecretKeyControl      : 1;
-               u32     disableOnlineCtrlReset      : 1;
-               u32     allowBootWithPinnedCache    : 1;
-               u32     disableSpinDownHS           : 1;
-               u32     enableJBOD                  : 1;
-               u32     reserved                    :18;
+#if   defined(__BIG_ENDIAN_BITFIELD)
+               u32     reserved:18;
+               u32     enableJBOD:1;
+               u32     disableSpinDownHS:1;
+               u32     allowBootWithPinnedCache:1;
+               u32     disableOnlineCtrlReset:1;
+               u32     enableSecretKeyControl:1;
+               u32     autoEnhancedImport:1;
+               u32     enableSpinDownUnconfigured:1;
+               u32     SSDPatrolReadEnabled:1;
+               u32     SSDSMARTerEnabled:1;
+               u32     disableNCQ:1;
+               u32     useFdeOnly:1;
+               u32     prCorrectUnconfiguredAreas:1;
+               u32     SMARTerEnabled:1;
+               u32     copyBackDisabled:1;
+#else
+               u32     copyBackDisabled:1;
+               u32     SMARTerEnabled:1;
+               u32     prCorrectUnconfiguredAreas:1;
+               u32     useFdeOnly:1;
+               u32     disableNCQ:1;
+               u32     SSDSMARTerEnabled:1;
+               u32     SSDPatrolReadEnabled:1;
+               u32     enableSpinDownUnconfigured:1;
+               u32     autoEnhancedImport:1;
+               u32     enableSecretKeyControl:1;
+               u32     disableOnlineCtrlReset:1;
+               u32     allowBootWithPinnedCache:1;
+               u32     disableSpinDownHS:1;
+               u32     enableJBOD:1;
+               u32     reserved:18;
+#endif
        } OnOffProperties;
        u8 autoSnapVDSpace;
        u8 viewSpace;
@@ -802,6 +838,30 @@ struct megasas_ctrl_info {
        u16 cacheMemorySize;                    /*7A2h */
 
        struct {                                /*7A4h */
+#if   defined(__BIG_ENDIAN_BITFIELD)
+               u32     reserved:11;
+               u32     supportUnevenSpans:1;
+               u32     dedicatedHotSparesLimited:1;
+               u32     headlessMode:1;
+               u32     supportEmulatedDrives:1;
+               u32     supportResetNow:1;
+               u32     realTimeScheduler:1;
+               u32     supportSSDPatrolRead:1;
+               u32     supportPerfTuning:1;
+               u32     disableOnlinePFKChange:1;
+               u32     supportJBOD:1;
+               u32     supportBootTimePFKChange:1;
+               u32     supportSetLinkSpeed:1;
+               u32     supportEmergencySpares:1;
+               u32     supportSuspendResumeBGops:1;
+               u32     blockSSDWriteCacheChange:1;
+               u32     supportShieldState:1;
+               u32     supportLdBBMInfo:1;
+               u32     supportLdPIType3:1;
+               u32     supportLdPIType2:1;
+               u32     supportLdPIType1:1;
+               u32     supportPIcontroller:1;
+#else
                u32     supportPIcontroller:1;
                u32     supportLdPIType1:1;
                u32     supportLdPIType2:1;
@@ -827,6 +887,7 @@ struct megasas_ctrl_info {
 
                u32     supportUnevenSpans:1;
                u32     reserved:11;
+#endif
        } adapterOperations2;
 
        u8  driverVersion[32];                  /*7A8h */
@@ -863,7 +924,7 @@ struct megasas_ctrl_info {
  * ===============================
  */
 #define MEGASAS_MAX_PD_CHANNELS                        2
-#define MEGASAS_MAX_LD_CHANNELS                        2
+#define MEGASAS_MAX_LD_CHANNELS                        1
 #define MEGASAS_MAX_CHANNELS                   (MEGASAS_MAX_PD_CHANNELS + \
                                                MEGASAS_MAX_LD_CHANNELS)
 #define MEGASAS_MAX_DEV_PER_CHANNEL            128
@@ -1051,9 +1112,15 @@ union megasas_sgl_frame {
 
 typedef union _MFI_CAPABILITIES {
        struct {
+#if   defined(__BIG_ENDIAN_BITFIELD)
+               u32     reserved:30;
+               u32     support_additional_msix:1;
+               u32     support_fp_remote_lun:1;
+#else
                u32     support_fp_remote_lun:1;
                u32     support_additional_msix:1;
                u32     reserved:30;
+#endif
        } mfi_capabilities;
        u32     reg;
 } MFI_CAPABILITIES;
@@ -1656,4 +1723,16 @@ struct megasas_mgmt_info {
        int max_index;
 };
 
+u8
+MR_BuildRaidContext(struct megasas_instance *instance,
+                   struct IO_REQUEST_INFO *io_info,
+                   struct RAID_CONTEXT *pRAID_Context,
+                   struct MR_FW_RAID_MAP_ALL *map, u8 **raidLUN);
+u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map);
+struct MR_LD_RAID *MR_LdRaidGet(u32 ld, struct MR_FW_RAID_MAP_ALL *map);
+u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map);
+u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map);
+u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map);
+u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map);
+
 #endif                         /*LSI_MEGARAID_SAS_H */
index 1f0ca68..3020921 100644 (file)
@@ -18,7 +18,7 @@
  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  *
  *  FILE: megaraid_sas_base.c
- *  Version : 06.600.18.00-rc1
+ *  Version : 06.700.06.00-rc1
  *
  *  Authors: LSI Corporation
  *           Sreenivas Bagalkote
@@ -92,6 +92,8 @@ MODULE_DESCRIPTION("LSI MegaRAID SAS Driver");
 
 int megasas_transition_to_ready(struct megasas_instance *instance, int ocr);
 static int megasas_get_pd_list(struct megasas_instance *instance);
+static int megasas_ld_list_query(struct megasas_instance *instance,
+                                u8 query_type);
 static int megasas_issue_init_mfi(struct megasas_instance *instance);
 static int megasas_register_aen(struct megasas_instance *instance,
                                u32 seq_num, u32 class_locale_word);
@@ -374,13 +376,11 @@ static int
 megasas_check_reset_xscale(struct megasas_instance *instance,
                struct megasas_register_set __iomem *regs)
 {
-       u32 consumer;
-       consumer = *instance->consumer;
 
        if ((instance->adprecovery != MEGASAS_HBA_OPERATIONAL) &&
-               (*instance->consumer == MEGASAS_ADPRESET_INPROG_SIGN)) {
+           (le32_to_cpu(*instance->consumer) ==
+               MEGASAS_ADPRESET_INPROG_SIGN))
                return 1;
-       }
        return 0;
 }
 
@@ -629,9 +629,10 @@ megasas_fire_cmd_skinny(struct megasas_instance *instance,
 {
        unsigned long flags;
        spin_lock_irqsave(&instance->hba_lock, flags);
-       writel(0, &(regs)->inbound_high_queue_port);
-       writel((frame_phys_addr | (frame_count<<1))|1,
-               &(regs)->inbound_low_queue_port);
+       writel(upper_32_bits(frame_phys_addr),
+              &(regs)->inbound_high_queue_port);
+       writel((lower_32_bits(frame_phys_addr) | (frame_count<<1))|1,
+              &(regs)->inbound_low_queue_port);
        spin_unlock_irqrestore(&instance->hba_lock, flags);
 }
 
@@ -879,8 +880,8 @@ megasas_issue_polled(struct megasas_instance *instance, struct megasas_cmd *cmd)
 
        struct megasas_header *frame_hdr = &cmd->frame->hdr;
 
-       frame_hdr->cmd_status = 0xFF;
-       frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+       frame_hdr->cmd_status = MFI_CMD_STATUS_POLL_MODE;
+       frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE);
 
        /*
         * Issue the frame using inbound queue port
@@ -944,10 +945,12 @@ megasas_issue_blocked_abort_cmd(struct megasas_instance *instance,
         */
        abort_fr->cmd = MFI_CMD_ABORT;
        abort_fr->cmd_status = 0xFF;
-       abort_fr->flags = 0;
-       abort_fr->abort_context = cmd_to_abort->index;
-       abort_fr->abort_mfi_phys_addr_lo = cmd_to_abort->frame_phys_addr;
-       abort_fr->abort_mfi_phys_addr_hi = 0;
+       abort_fr->flags = cpu_to_le16(0);
+       abort_fr->abort_context = cpu_to_le32(cmd_to_abort->index);
+       abort_fr->abort_mfi_phys_addr_lo =
+               cpu_to_le32(lower_32_bits(cmd_to_abort->frame_phys_addr));
+       abort_fr->abort_mfi_phys_addr_hi =
+               cpu_to_le32(upper_32_bits(cmd_to_abort->frame_phys_addr));
 
        cmd->sync_cmd = 1;
        cmd->cmd_status = 0xFF;
@@ -986,8 +989,8 @@ megasas_make_sgl32(struct megasas_instance *instance, struct scsi_cmnd *scp,
 
        if (sge_count) {
                scsi_for_each_sg(scp, os_sgl, sge_count, i) {
-                       mfi_sgl->sge32[i].length = sg_dma_len(os_sgl);
-                       mfi_sgl->sge32[i].phys_addr = sg_dma_address(os_sgl);
+                       mfi_sgl->sge32[i].length = cpu_to_le32(sg_dma_len(os_sgl));
+                       mfi_sgl->sge32[i].phys_addr = cpu_to_le32(sg_dma_address(os_sgl));
                }
        }
        return sge_count;
@@ -1015,8 +1018,8 @@ megasas_make_sgl64(struct megasas_instance *instance, struct scsi_cmnd *scp,
 
        if (sge_count) {
                scsi_for_each_sg(scp, os_sgl, sge_count, i) {
-                       mfi_sgl->sge64[i].length = sg_dma_len(os_sgl);
-                       mfi_sgl->sge64[i].phys_addr = sg_dma_address(os_sgl);
+                       mfi_sgl->sge64[i].length = cpu_to_le32(sg_dma_len(os_sgl));
+                       mfi_sgl->sge64[i].phys_addr = cpu_to_le64(sg_dma_address(os_sgl));
                }
        }
        return sge_count;
@@ -1043,10 +1046,11 @@ megasas_make_sgl_skinny(struct megasas_instance *instance,
 
        if (sge_count) {
                scsi_for_each_sg(scp, os_sgl, sge_count, i) {
-                       mfi_sgl->sge_skinny[i].length = sg_dma_len(os_sgl);
+                       mfi_sgl->sge_skinny[i].length =
+                               cpu_to_le32(sg_dma_len(os_sgl));
                        mfi_sgl->sge_skinny[i].phys_addr =
-                                               sg_dma_address(os_sgl);
-                       mfi_sgl->sge_skinny[i].flag = 0;
+                               cpu_to_le64(sg_dma_address(os_sgl));
+                       mfi_sgl->sge_skinny[i].flag = cpu_to_le32(0);
                }
        }
        return sge_count;
@@ -1155,8 +1159,8 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
        pthru->cdb_len = scp->cmd_len;
        pthru->timeout = 0;
        pthru->pad_0 = 0;
-       pthru->flags = flags;
-       pthru->data_xfer_len = scsi_bufflen(scp);
+       pthru->flags = cpu_to_le16(flags);
+       pthru->data_xfer_len = cpu_to_le32(scsi_bufflen(scp));
 
        memcpy(pthru->cdb, scp->cmnd, scp->cmd_len);
 
@@ -1168,18 +1172,18 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
                if ((scp->request->timeout / HZ) > 0xFFFF)
                        pthru->timeout = 0xFFFF;
                else
-                       pthru->timeout = scp->request->timeout / HZ;
+                       pthru->timeout = cpu_to_le16(scp->request->timeout / HZ);
        }
 
        /*
         * Construct SGL
         */
        if (instance->flag_ieee == 1) {
-               pthru->flags |= MFI_FRAME_SGL64;
+               pthru->flags |= cpu_to_le16(MFI_FRAME_SGL64);
                pthru->sge_count = megasas_make_sgl_skinny(instance, scp,
                                                      &pthru->sgl);
        } else if (IS_DMA64) {
-               pthru->flags |= MFI_FRAME_SGL64;
+               pthru->flags |= cpu_to_le16(MFI_FRAME_SGL64);
                pthru->sge_count = megasas_make_sgl64(instance, scp,
                                                      &pthru->sgl);
        } else
@@ -1196,8 +1200,10 @@ megasas_build_dcdb(struct megasas_instance *instance, struct scsi_cmnd *scp,
         * Sense info specific
         */
        pthru->sense_len = SCSI_SENSE_BUFFERSIZE;
-       pthru->sense_buf_phys_addr_hi = 0;
-       pthru->sense_buf_phys_addr_lo = cmd->sense_phys_addr;
+       pthru->sense_buf_phys_addr_hi =
+               cpu_to_le32(upper_32_bits(cmd->sense_phys_addr));
+       pthru->sense_buf_phys_addr_lo =
+               cpu_to_le32(lower_32_bits(cmd->sense_phys_addr));
 
        /*
         * Compute the total number of frames this command consumes. FW uses
@@ -1248,7 +1254,7 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp,
        ldio->timeout = 0;
        ldio->reserved_0 = 0;
        ldio->pad_0 = 0;
-       ldio->flags = flags;
+       ldio->flags = cpu_to_le16(flags);
        ldio->start_lba_hi = 0;
        ldio->access_byte = (scp->cmd_len != 6) ? scp->cmnd[1] : 0;
 
@@ -1256,52 +1262,59 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp,
         * 6-byte READ(0x08) or WRITE(0x0A) cdb
         */
        if (scp->cmd_len == 6) {
-               ldio->lba_count = (u32) scp->cmnd[4];
-               ldio->start_lba_lo = ((u32) scp->cmnd[1] << 16) |
-                   ((u32) scp->cmnd[2] << 8) | (u32) scp->cmnd[3];
+               ldio->lba_count = cpu_to_le32((u32) scp->cmnd[4]);
+               ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[1] << 16) |
+                                                ((u32) scp->cmnd[2] << 8) |
+                                                (u32) scp->cmnd[3]);
 
-               ldio->start_lba_lo &= 0x1FFFFF;
+               ldio->start_lba_lo &= cpu_to_le32(0x1FFFFF);
        }
 
        /*
         * 10-byte READ(0x28) or WRITE(0x2A) cdb
         */
        else if (scp->cmd_len == 10) {
-               ldio->lba_count = (u32) scp->cmnd[8] |
-                   ((u32) scp->cmnd[7] << 8);
-               ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) |
-                   ((u32) scp->cmnd[3] << 16) |
-                   ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+               ldio->lba_count = cpu_to_le32((u32) scp->cmnd[8] |
+                                             ((u32) scp->cmnd[7] << 8));
+               ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[2] << 24) |
+                                                ((u32) scp->cmnd[3] << 16) |
+                                                ((u32) scp->cmnd[4] << 8) |
+                                                (u32) scp->cmnd[5]);
        }
 
        /*
         * 12-byte READ(0xA8) or WRITE(0xAA) cdb
         */
        else if (scp->cmd_len == 12) {
-               ldio->lba_count = ((u32) scp->cmnd[6] << 24) |
-                   ((u32) scp->cmnd[7] << 16) |
-                   ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9];
+               ldio->lba_count = cpu_to_le32(((u32) scp->cmnd[6] << 24) |
+                                             ((u32) scp->cmnd[7] << 16) |
+                                             ((u32) scp->cmnd[8] << 8) |
+                                             (u32) scp->cmnd[9]);
 
-               ldio->start_lba_lo = ((u32) scp->cmnd[2] << 24) |
-                   ((u32) scp->cmnd[3] << 16) |
-                   ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+               ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[2] << 24) |
+                                                ((u32) scp->cmnd[3] << 16) |
+                                                ((u32) scp->cmnd[4] << 8) |
+                                                (u32) scp->cmnd[5]);
        }
 
        /*
         * 16-byte READ(0x88) or WRITE(0x8A) cdb
         */
        else if (scp->cmd_len == 16) {
-               ldio->lba_count = ((u32) scp->cmnd[10] << 24) |
-                   ((u32) scp->cmnd[11] << 16) |
-                   ((u32) scp->cmnd[12] << 8) | (u32) scp->cmnd[13];
+               ldio->lba_count = cpu_to_le32(((u32) scp->cmnd[10] << 24) |
+                                             ((u32) scp->cmnd[11] << 16) |
+                                             ((u32) scp->cmnd[12] << 8) |
+                                             (u32) scp->cmnd[13]);
 
-               ldio->start_lba_lo = ((u32) scp->cmnd[6] << 24) |
-                   ((u32) scp->cmnd[7] << 16) |
-                   ((u32) scp->cmnd[8] << 8) | (u32) scp->cmnd[9];
+               ldio->start_lba_lo = cpu_to_le32(((u32) scp->cmnd[6] << 24) |
+                                                ((u32) scp->cmnd[7] << 16) |
+                                                ((u32) scp->cmnd[8] << 8) |
+                                                (u32) scp->cmnd[9]);
 
-               ldio->start_lba_hi = ((u32) scp->cmnd[2] << 24) |
-                   ((u32) scp->cmnd[3] << 16) |
-                   ((u32) scp->cmnd[4] << 8) | (u32) scp->cmnd[5];
+               ldio->start_lba_hi = cpu_to_le32(((u32) scp->cmnd[2] << 24) |
+                                                ((u32) scp->cmnd[3] << 16) |
+                                                ((u32) scp->cmnd[4] << 8) |
+                                                (u32) scp->cmnd[5]);
 
        }
 
@@ -1309,11 +1322,11 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp,
         * Construct SGL
         */
        if (instance->flag_ieee) {
-               ldio->flags |= MFI_FRAME_SGL64;
+               ldio->flags |= cpu_to_le16(MFI_FRAME_SGL64);
                ldio->sge_count = megasas_make_sgl_skinny(instance, scp,
                                              &ldio->sgl);
        } else if (IS_DMA64) {
-               ldio->flags |= MFI_FRAME_SGL64;
+               ldio->flags |= cpu_to_le16(MFI_FRAME_SGL64);
                ldio->sge_count = megasas_make_sgl64(instance, scp, &ldio->sgl);
        } else
                ldio->sge_count = megasas_make_sgl32(instance, scp, &ldio->sgl);
@@ -1329,7 +1342,7 @@ megasas_build_ldio(struct megasas_instance *instance, struct scsi_cmnd *scp,
         */
        ldio->sense_len = SCSI_SENSE_BUFFERSIZE;
        ldio->sense_buf_phys_addr_hi = 0;
-       ldio->sense_buf_phys_addr_lo = cmd->sense_phys_addr;
+       ldio->sense_buf_phys_addr_lo = cpu_to_le32(cmd->sense_phys_addr);
 
        /*
         * Compute the total number of frames this command consumes. FW uses
@@ -1400,20 +1413,32 @@ megasas_dump_pending_frames(struct megasas_instance *instance)
                        ldio = (struct megasas_io_frame *)cmd->frame;
                        mfi_sgl = &ldio->sgl;
                        sgcount = ldio->sge_count;
-                       printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, lba lo : 0x%x, lba_hi : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",instance->host->host_no, cmd->frame_count,ldio->cmd,ldio->target_id, ldio->start_lba_lo,ldio->start_lba_hi,ldio->sense_buf_phys_addr_lo,sgcount);
+                       printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x,"
+                       " lba lo : 0x%x, lba_hi : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",
+                       instance->host->host_no, cmd->frame_count, ldio->cmd, ldio->target_id,
+                       le32_to_cpu(ldio->start_lba_lo), le32_to_cpu(ldio->start_lba_hi),
+                       le32_to_cpu(ldio->sense_buf_phys_addr_lo), sgcount);
                }
                else {
                        pthru = (struct megasas_pthru_frame *) cmd->frame;
                        mfi_sgl = &pthru->sgl;
                        sgcount = pthru->sge_count;
-                       printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, lun : 0x%x, cdb_len : 0x%x, data xfer len : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",instance->host->host_no,cmd->frame_count,pthru->cmd,pthru->target_id,pthru->lun,pthru->cdb_len , pthru->data_xfer_len,pthru->sense_buf_phys_addr_lo,sgcount);
+                       printk(KERN_ERR "megasas[%d]: frame count : 0x%x, Cmd : 0x%x, Tgt id : 0x%x, "
+                       "lun : 0x%x, cdb_len : 0x%x, data xfer len : 0x%x, sense_buf addr : 0x%x,sge count : 0x%x\n",
+                       instance->host->host_no, cmd->frame_count, pthru->cmd, pthru->target_id,
+                       pthru->lun, pthru->cdb_len, le32_to_cpu(pthru->data_xfer_len),
+                       le32_to_cpu(pthru->sense_buf_phys_addr_lo), sgcount);
                }
        if(megasas_dbg_lvl & MEGASAS_DBG_LVL){
                for (n = 0; n < sgcount; n++){
                        if (IS_DMA64)
-                               printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%08lx ",mfi_sgl->sge64[n].length , (unsigned long)mfi_sgl->sge64[n].phys_addr) ;
+                               printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%llx ",
+                                       le32_to_cpu(mfi_sgl->sge64[n].length),
+                                       le64_to_cpu(mfi_sgl->sge64[n].phys_addr));
                        else
-                               printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%x ",mfi_sgl->sge32[n].length , mfi_sgl->sge32[n].phys_addr) ;
+                               printk(KERN_ERR "megasas: sgl len : 0x%x, sgl addr : 0x%x ",
+                                       le32_to_cpu(mfi_sgl->sge32[n].length),
+                                       le32_to_cpu(mfi_sgl->sge32[n].phys_addr));
                        }
                }
                printk(KERN_ERR "\n");
@@ -1674,11 +1699,11 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr)
 
        spin_lock_irqsave(&instance->completion_lock, flags);
 
-       producer = *instance->producer;
-       consumer = *instance->consumer;
+       producer = le32_to_cpu(*instance->producer);
+       consumer = le32_to_cpu(*instance->consumer);
 
        while (consumer != producer) {
-               context = instance->reply_queue[consumer];
+               context = le32_to_cpu(instance->reply_queue[consumer]);
                if (context >= instance->max_fw_cmds) {
                        printk(KERN_ERR "Unexpected context value %x\n",
                                context);
@@ -1695,7 +1720,7 @@ static void megasas_complete_cmd_dpc(unsigned long instance_addr)
                }
        }
 
-       *instance->consumer = producer;
+       *instance->consumer = cpu_to_le32(producer);
 
        spin_unlock_irqrestore(&instance->completion_lock, flags);
 
@@ -1716,7 +1741,7 @@ void megasas_do_ocr(struct megasas_instance *instance)
        if ((instance->pdev->device == PCI_DEVICE_ID_LSI_SAS1064R) ||
        (instance->pdev->device == PCI_DEVICE_ID_DELL_PERC5) ||
        (instance->pdev->device == PCI_DEVICE_ID_LSI_VERDE_ZCR)) {
-               *instance->consumer     = MEGASAS_ADPRESET_INPROG_SIGN;
+               *instance->consumer = cpu_to_le32(MEGASAS_ADPRESET_INPROG_SIGN);
        }
        instance->instancet->disable_intr(instance);
        instance->adprecovery   = MEGASAS_ADPRESET_SM_INFAULT;
@@ -2186,6 +2211,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
        struct megasas_header *hdr = &cmd->frame->hdr;
        unsigned long flags;
        struct fusion_context *fusion = instance->ctrl_context;
+       u32 opcode;
 
        /* flag for the retry reset */
        cmd->retry_for_fw_reset = 0;
@@ -2287,9 +2313,10 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
        case MFI_CMD_SMP:
        case MFI_CMD_STP:
        case MFI_CMD_DCMD:
+               opcode = le32_to_cpu(cmd->frame->dcmd.opcode);
                /* Check for LD map update */
-               if ((cmd->frame->dcmd.opcode == MR_DCMD_LD_MAP_GET_INFO) &&
-                   (cmd->frame->dcmd.mbox.b[1] == 1)) {
+               if ((opcode == MR_DCMD_LD_MAP_GET_INFO)
+                       && (cmd->frame->dcmd.mbox.b[1] == 1)) {
                        fusion->fast_path_io = 0;
                        spin_lock_irqsave(instance->host->host_lock, flags);
                        if (cmd->frame->hdr.cmd_status != 0) {
@@ -2323,8 +2350,8 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
                                               flags);
                        break;
                }
-               if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET_INFO ||
-                       cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_GET) {
+               if (opcode == MR_DCMD_CTRL_EVENT_GET_INFO ||
+                   opcode == MR_DCMD_CTRL_EVENT_GET) {
                        spin_lock_irqsave(&poll_aen_lock, flags);
                        megasas_poll_wait_aen = 0;
                        spin_unlock_irqrestore(&poll_aen_lock, flags);
@@ -2333,7 +2360,7 @@ megasas_complete_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd,
                /*
                 * See if got an event notification
                 */
-               if (cmd->frame->dcmd.opcode == MR_DCMD_CTRL_EVENT_WAIT)
+               if (opcode == MR_DCMD_CTRL_EVENT_WAIT)
                        megasas_service_aen(instance, cmd);
                else
                        megasas_complete_int_cmd(instance, cmd);
@@ -2606,7 +2633,7 @@ megasas_deplete_reply_queue(struct megasas_instance *instance,
                                        PCI_DEVICE_ID_LSI_VERDE_ZCR)) {
 
                                *instance->consumer =
-                                       MEGASAS_ADPRESET_INPROG_SIGN;
+                                       cpu_to_le32(MEGASAS_ADPRESET_INPROG_SIGN);
                        }
 
 
@@ -2983,7 +3010,7 @@ static int megasas_create_frame_pool(struct megasas_instance *instance)
                }
 
                memset(cmd->frame, 0, total_sz);
-               cmd->frame->io.context = cmd->index;
+               cmd->frame->io.context = cpu_to_le32(cmd->index);
                cmd->frame->io.pad_0 = 0;
                if ((instance->pdev->device != PCI_DEVICE_ID_LSI_FUSION) &&
                    (instance->pdev->device != PCI_DEVICE_ID_LSI_INVADER) &&
@@ -3143,13 +3170,13 @@ megasas_get_pd_list(struct megasas_instance *instance)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
-       dcmd->data_xfer_len = MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST);
-       dcmd->opcode = MR_DCMD_PD_LIST_QUERY;
-       dcmd->sgl.sge32[0].phys_addr = ci_h;
-       dcmd->sgl.sge32[0].length = MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST);
+       dcmd->data_xfer_len = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_PD_LIST_QUERY);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(MEGASAS_MAX_PD * sizeof(struct MR_PD_LIST));
 
        if (!megasas_issue_polled(instance, cmd)) {
                ret = 0;
@@ -3164,16 +3191,16 @@ megasas_get_pd_list(struct megasas_instance *instance)
        pd_addr = ci->addr;
 
        if ( ret == 0 &&
-               (ci->count <
+            (le32_to_cpu(ci->count) <
                  (MEGASAS_MAX_PD_CHANNELS * MEGASAS_MAX_DEV_PER_CHANNEL))) {
 
                memset(instance->pd_list, 0,
                        MEGASAS_MAX_PD * sizeof(struct megasas_pd_list));
 
-               for (pd_index = 0; pd_index < ci->count; pd_index++) {
+               for (pd_index = 0; pd_index < le32_to_cpu(ci->count); pd_index++) {
 
                        instance->pd_list[pd_addr->deviceId].tid        =
-                                                       pd_addr->deviceId;
+                               le16_to_cpu(pd_addr->deviceId);
                        instance->pd_list[pd_addr->deviceId].driveType  =
                                                        pd_addr->scsiDevType;
                        instance->pd_list[pd_addr->deviceId].driveState =
@@ -3207,6 +3234,7 @@ megasas_get_ld_list(struct megasas_instance *instance)
        struct megasas_dcmd_frame *dcmd;
        struct MR_LD_LIST *ci;
        dma_addr_t ci_h = 0;
+       u32 ld_count;
 
        cmd = megasas_get_cmd(instance);
 
@@ -3233,12 +3261,12 @@ megasas_get_ld_list(struct megasas_instance *instance)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
-       dcmd->data_xfer_len = sizeof(struct MR_LD_LIST);
-       dcmd->opcode = MR_DCMD_LD_GET_LIST;
-       dcmd->sgl.sge32[0].phys_addr = ci_h;
-       dcmd->sgl.sge32[0].length = sizeof(struct MR_LD_LIST);
+       dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_LIST));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_LD_GET_LIST);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_LIST));
        dcmd->pad_0  = 0;
 
        if (!megasas_issue_polled(instance, cmd)) {
@@ -3247,12 +3275,14 @@ megasas_get_ld_list(struct megasas_instance *instance)
                ret = -1;
        }
 
+       ld_count = le32_to_cpu(ci->ldCount);
+
        /* the following function will get the instance PD LIST */
 
-       if ((ret == 0) && (ci->ldCount <= MAX_LOGICAL_DRIVES)) {
+       if ((ret == 0) && (ld_count <= MAX_LOGICAL_DRIVES)) {
                memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
 
-               for (ld_index = 0; ld_index < ci->ldCount; ld_index++) {
+               for (ld_index = 0; ld_index < ld_count; ld_index++) {
                        if (ci->ldList[ld_index].state != 0) {
                                ids = ci->ldList[ld_index].ref.targetId;
                                instance->ld_ids[ids] =
@@ -3270,6 +3300,87 @@ megasas_get_ld_list(struct megasas_instance *instance)
        return ret;
 }
 
+/**
+ * megasas_ld_list_query -     Returns FW's ld_list structure
+ * @instance:                          Adapter soft state
+ * @ld_list:                           ld_list structure
+ *
+ * Issues an internal command (DCMD) to get the FW's controller PD
+ * list structure.  This information is mainly used to find out SYSTEM
+ * supported by the FW.
+ */
+static int
+megasas_ld_list_query(struct megasas_instance *instance, u8 query_type)
+{
+       int ret = 0, ld_index = 0, ids = 0;
+       struct megasas_cmd *cmd;
+       struct megasas_dcmd_frame *dcmd;
+       struct MR_LD_TARGETID_LIST *ci;
+       dma_addr_t ci_h = 0;
+       u32 tgtid_count;
+
+       cmd = megasas_get_cmd(instance);
+
+       if (!cmd) {
+               printk(KERN_WARNING
+                      "megasas:(megasas_ld_list_query): Failed to get cmd\n");
+               return -ENOMEM;
+       }
+
+       dcmd = &cmd->frame->dcmd;
+
+       ci = pci_alloc_consistent(instance->pdev,
+                                 sizeof(struct MR_LD_TARGETID_LIST), &ci_h);
+
+       if (!ci) {
+               printk(KERN_WARNING
+                      "megasas: Failed to alloc mem for ld_list_query\n");
+               megasas_return_cmd(instance, cmd);
+               return -ENOMEM;
+       }
+
+       memset(ci, 0, sizeof(*ci));
+       memset(dcmd->mbox.b, 0, MFI_MBOX_SIZE);
+
+       dcmd->mbox.b[0] = query_type;
+
+       dcmd->cmd = MFI_CMD_DCMD;
+       dcmd->cmd_status = 0xFF;
+       dcmd->sge_count = 1;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
+       dcmd->timeout = 0;
+       dcmd->data_xfer_len = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_LD_LIST_QUERY);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct MR_LD_TARGETID_LIST));
+       dcmd->pad_0  = 0;
+
+       if (!megasas_issue_polled(instance, cmd) && !dcmd->cmd_status) {
+               ret = 0;
+       } else {
+               /* On failure, call older LD list DCMD */
+               ret = 1;
+       }
+
+       tgtid_count = le32_to_cpu(ci->count);
+
+       if ((ret == 0) && (tgtid_count <= (MAX_LOGICAL_DRIVES))) {
+               memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
+               for (ld_index = 0; ld_index < tgtid_count; ld_index++) {
+                       ids = ci->targetId[ld_index];
+                       instance->ld_ids[ids] = ci->targetId[ld_index];
+               }
+
+       }
+
+       pci_free_consistent(instance->pdev, sizeof(struct MR_LD_TARGETID_LIST),
+                           ci, ci_h);
+
+       megasas_return_cmd(instance, cmd);
+
+       return ret;
+}
+
 /**
  * megasas_get_controller_info -       Returns FW's controller structure
  * @instance:                          Adapter soft state
@@ -3313,13 +3424,13 @@ megasas_get_ctrl_info(struct megasas_instance *instance,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
-       dcmd->data_xfer_len = sizeof(struct megasas_ctrl_info);
-       dcmd->opcode = MR_DCMD_CTRL_GET_INFO;
-       dcmd->sgl.sge32[0].phys_addr = ci_h;
-       dcmd->sgl.sge32[0].length = sizeof(struct megasas_ctrl_info);
+       dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_ctrl_info));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_GET_INFO);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_ctrl_info));
 
        if (!megasas_issue_polled(instance, cmd)) {
                ret = 0;
@@ -3375,17 +3486,20 @@ megasas_issue_init_mfi(struct megasas_instance *instance)
        memset(initq_info, 0, sizeof(struct megasas_init_queue_info));
        init_frame->context = context;
 
-       initq_info->reply_queue_entries = instance->max_fw_cmds + 1;
-       initq_info->reply_queue_start_phys_addr_lo = instance->reply_queue_h;
+       initq_info->reply_queue_entries = cpu_to_le32(instance->max_fw_cmds + 1);
+       initq_info->reply_queue_start_phys_addr_lo = cpu_to_le32(instance->reply_queue_h);
 
-       initq_info->producer_index_phys_addr_lo = instance->producer_h;
-       initq_info->consumer_index_phys_addr_lo = instance->consumer_h;
+       initq_info->producer_index_phys_addr_lo = cpu_to_le32(instance->producer_h);
+       initq_info->consumer_index_phys_addr_lo = cpu_to_le32(instance->consumer_h);
 
        init_frame->cmd = MFI_CMD_INIT;
        init_frame->cmd_status = 0xFF;
-       init_frame->queue_info_new_phys_addr_lo = initq_info_h;
+       init_frame->queue_info_new_phys_addr_lo =
+               cpu_to_le32(lower_32_bits(initq_info_h));
+       init_frame->queue_info_new_phys_addr_hi =
+               cpu_to_le32(upper_32_bits(initq_info_h));
 
-       init_frame->data_xfer_len = sizeof(struct megasas_init_queue_info);
+       init_frame->data_xfer_len = cpu_to_le32(sizeof(struct megasas_init_queue_info));
 
        /*
         * disable the intr before firing the init frame to FW
@@ -3648,7 +3762,9 @@ static int megasas_init_fw(struct megasas_instance *instance)
        megasas_get_pd_list(instance);
 
        memset(instance->ld_ids, 0xff, MEGASAS_MAX_LD_IDS);
-       megasas_get_ld_list(instance);
+       if (megasas_ld_list_query(instance,
+                                 MR_LD_QUERY_TYPE_EXPOSED_TO_HOST))
+               megasas_get_ld_list(instance);
 
        ctrl_info = kmalloc(sizeof(struct megasas_ctrl_info), GFP_KERNEL);
 
@@ -3665,8 +3781,8 @@ static int megasas_init_fw(struct megasas_instance *instance)
        if (ctrl_info && !megasas_get_ctrl_info(instance, ctrl_info)) {
 
                max_sectors_1 = (1 << ctrl_info->stripe_sz_ops.min) *
-                   ctrl_info->max_strips_per_io;
-               max_sectors_2 = ctrl_info->max_request_size;
+                       le16_to_cpu(ctrl_info->max_strips_per_io);
+               max_sectors_2 = le32_to_cpu(ctrl_info->max_request_size);
 
                tmp_sectors = min_t(u32, max_sectors_1 , max_sectors_2);
 
@@ -3675,14 +3791,18 @@ static int megasas_init_fw(struct megasas_instance *instance)
                        instance->is_imr = 0;
                        dev_info(&instance->pdev->dev, "Controller type: MR,"
                                "Memory size is: %dMB\n",
-                               ctrl_info->memory_size);
+                               le16_to_cpu(ctrl_info->memory_size));
                } else {
                        instance->is_imr = 1;
                        dev_info(&instance->pdev->dev,
                                "Controller type: iMR\n");
                }
+               /* OnOffProperties are converted into CPU arch*/
+               le32_to_cpus((u32 *)&ctrl_info->properties.OnOffProperties);
                instance->disableOnlineCtrlReset =
                ctrl_info->properties.OnOffProperties.disableOnlineCtrlReset;
+               /* adapterOperations2 are converted into CPU arch*/
+               le32_to_cpus((u32 *)&ctrl_info->adapterOperations2);
                instance->UnevenSpanSupport =
                        ctrl_info->adapterOperations2.supportUnevenSpans;
                if (instance->UnevenSpanSupport) {
@@ -3696,7 +3816,6 @@ static int megasas_init_fw(struct megasas_instance *instance)
 
                }
        }
-
        instance->max_sectors_per_req = instance->max_num_sge *
                                                PAGE_SIZE / 512;
        if (tmp_sectors && (instance->max_sectors_per_req > tmp_sectors))
@@ -3802,20 +3921,24 @@ megasas_get_seq_num(struct megasas_instance *instance,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
-       dcmd->data_xfer_len = sizeof(struct megasas_evt_log_info);
-       dcmd->opcode = MR_DCMD_CTRL_EVENT_GET_INFO;
-       dcmd->sgl.sge32[0].phys_addr = el_info_h;
-       dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_log_info);
+       dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_log_info));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_EVENT_GET_INFO);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(el_info_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_log_info));
 
        megasas_issue_blocked_cmd(instance, cmd);
 
        /*
         * Copy the data back into callers buffer
         */
-       memcpy(eli, el_info, sizeof(struct megasas_evt_log_info));
+       eli->newest_seq_num = le32_to_cpu(el_info->newest_seq_num);
+       eli->oldest_seq_num = le32_to_cpu(el_info->oldest_seq_num);
+       eli->clear_seq_num = le32_to_cpu(el_info->clear_seq_num);
+       eli->shutdown_seq_num = le32_to_cpu(el_info->shutdown_seq_num);
+       eli->boot_seq_num = le32_to_cpu(el_info->boot_seq_num);
 
        pci_free_consistent(instance->pdev, sizeof(struct megasas_evt_log_info),
                            el_info, el_info_h);
@@ -3862,6 +3985,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num,
        if (instance->aen_cmd) {
 
                prev_aen.word = instance->aen_cmd->frame->dcmd.mbox.w[1];
+               prev_aen.members.locale = le16_to_cpu(prev_aen.members.locale);
 
                /*
                 * A class whose enum value is smaller is inclusive of all
@@ -3874,7 +3998,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num,
                 * values
                 */
                if ((prev_aen.members.class <= curr_aen.members.class) &&
-                   !((prev_aen.members.locale & curr_aen.members.locale) ^
+                   !((le16_to_cpu(prev_aen.members.locale) & curr_aen.members.locale) ^
                      curr_aen.members.locale)) {
                        /*
                         * Previously issued event registration includes
@@ -3882,7 +4006,7 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num,
                         */
                        return 0;
                } else {
-                       curr_aen.members.locale |= prev_aen.members.locale;
+                       curr_aen.members.locale |= le16_to_cpu(prev_aen.members.locale);
 
                        if (prev_aen.members.class < curr_aen.members.class)
                                curr_aen.members.class = prev_aen.members.class;
@@ -3917,16 +4041,16 @@ megasas_register_aen(struct megasas_instance *instance, u32 seq_num,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
+       dcmd->data_xfer_len = cpu_to_le32(sizeof(struct megasas_evt_detail));
+       dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_EVENT_WAIT);
+       dcmd->mbox.w[0] = cpu_to_le32(seq_num);
        instance->last_seq_num = seq_num;
-       dcmd->data_xfer_len = sizeof(struct megasas_evt_detail);
-       dcmd->opcode = MR_DCMD_CTRL_EVENT_WAIT;
-       dcmd->mbox.w[0] = seq_num;
-       dcmd->mbox.w[1] = curr_aen.word;
-       dcmd->sgl.sge32[0].phys_addr = (u32) instance->evt_detail_h;
-       dcmd->sgl.sge32[0].length = sizeof(struct megasas_evt_detail);
+       dcmd->mbox.w[1] = cpu_to_le32(curr_aen.word);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance->evt_detail_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct megasas_evt_detail));
 
        if (instance->aen_cmd != NULL) {
                megasas_return_cmd(instance, cmd);
@@ -3972,8 +4096,9 @@ static int megasas_start_aen(struct megasas_instance *instance)
        class_locale.members.locale = MR_EVT_LOCALE_ALL;
        class_locale.members.class = MR_EVT_CLASS_DEBUG;
 
-       return megasas_register_aen(instance, eli.newest_seq_num + 1,
-                                   class_locale.word);
+       return megasas_register_aen(instance,
+                       le32_to_cpu(eli.newest_seq_num) + 1,
+                       class_locale.word);
 }
 
 /**
@@ -4068,6 +4193,7 @@ megasas_set_dma_mask(struct pci_dev *pdev)
                if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0)
                        goto fail_set_dma_mask;
        }
+
        return 0;
 
 fail_set_dma_mask:
@@ -4386,11 +4512,11 @@ static void megasas_flush_cache(struct megasas_instance *instance)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 0;
-       dcmd->flags = MFI_FRAME_DIR_NONE;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = 0;
-       dcmd->opcode = MR_DCMD_CTRL_CACHE_FLUSH;
+       dcmd->opcode = cpu_to_le32(MR_DCMD_CTRL_CACHE_FLUSH);
        dcmd->mbox.b[0] = MR_FLUSH_CTRL_CACHE | MR_FLUSH_DISK_CACHE;
 
        megasas_issue_blocked_cmd(instance, cmd);
@@ -4431,11 +4557,11 @@ static void megasas_shutdown_controller(struct megasas_instance *instance,
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0x0;
        dcmd->sge_count = 0;
-       dcmd->flags = MFI_FRAME_DIR_NONE;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_NONE);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
        dcmd->data_xfer_len = 0;
-       dcmd->opcode = opcode;
+       dcmd->opcode = cpu_to_le32(opcode);
 
        megasas_issue_blocked_cmd(instance, cmd);
 
@@ -4850,10 +4976,11 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
         * alone separately
         */
        memcpy(cmd->frame, ioc->frame.raw, 2 * MEGAMFI_FRAME_SIZE);
-       cmd->frame->hdr.context = cmd->index;
+       cmd->frame->hdr.context = cpu_to_le32(cmd->index);
        cmd->frame->hdr.pad_0 = 0;
-       cmd->frame->hdr.flags &= ~(MFI_FRAME_IEEE | MFI_FRAME_SGL64 |
-                                  MFI_FRAME_SENSE64);
+       cmd->frame->hdr.flags &= cpu_to_le16(~(MFI_FRAME_IEEE |
+                                              MFI_FRAME_SGL64 |
+                                              MFI_FRAME_SENSE64));
 
        /*
         * The management interface between applications and the fw uses
@@ -4887,8 +5014,8 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
                 * We don't change the dma_coherent_mask, so
                 * pci_alloc_consistent only returns 32bit addresses
                 */
-               kern_sge32[i].phys_addr = (u32) buf_handle;
-               kern_sge32[i].length = ioc->sgl[i].iov_len;
+               kern_sge32[i].phys_addr = cpu_to_le32(buf_handle);
+               kern_sge32[i].length = cpu_to_le32(ioc->sgl[i].iov_len);
 
                /*
                 * We created a kernel buffer corresponding to the
@@ -4911,7 +5038,7 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
 
                sense_ptr =
                (unsigned long *) ((unsigned long)cmd->frame + ioc->sense_off);
-               *sense_ptr = sense_handle;
+               *sense_ptr = cpu_to_le32(sense_handle);
        }
 
        /*
@@ -4971,9 +5098,9 @@ megasas_mgmt_fw_ioctl(struct megasas_instance *instance,
        for (i = 0; i < ioc->sge_count; i++) {
                if (kbuff_arr[i])
                        dma_free_coherent(&instance->pdev->dev,
-                                         kern_sge32[i].length,
+                                         le32_to_cpu(kern_sge32[i].length),
                                          kbuff_arr[i],
-                                         kern_sge32[i].phys_addr);
+                                         le32_to_cpu(kern_sge32[i].phys_addr));
        }
 
        megasas_return_cmd(instance, cmd);
@@ -5327,7 +5454,7 @@ megasas_aen_polling(struct work_struct *work)
        host = instance->host;
        if (instance->evt_detail) {
 
-               switch (instance->evt_detail->code) {
+               switch (le32_to_cpu(instance->evt_detail->code)) {
                case MR_EVT_PD_INSERTED:
                        if (megasas_get_pd_list(instance) == 0) {
                        for (i = 0; i < MEGASAS_MAX_PD_CHANNELS; i++) {
@@ -5389,7 +5516,9 @@ megasas_aen_polling(struct work_struct *work)
                case MR_EVT_LD_OFFLINE:
                case MR_EVT_CFG_CLEARED:
                case MR_EVT_LD_DELETED:
-                       megasas_get_ld_list(instance);
+                       if (megasas_ld_list_query(instance,
+                                       MR_LD_QUERY_TYPE_EXPOSED_TO_HOST))
+                               megasas_get_ld_list(instance);
                        for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) {
                                for (j = 0;
                                j < MEGASAS_MAX_DEV_PER_CHANNEL;
@@ -5399,7 +5528,7 @@ megasas_aen_polling(struct work_struct *work)
                                (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j;
 
                                sdev1 = scsi_device_lookup(host,
-                                       i + MEGASAS_MAX_LD_CHANNELS,
+                                       MEGASAS_MAX_PD_CHANNELS + i,
                                        j,
                                        0);
 
@@ -5418,7 +5547,9 @@ megasas_aen_polling(struct work_struct *work)
                        doscan = 0;
                        break;
                case MR_EVT_LD_CREATED:
-                       megasas_get_ld_list(instance);
+                       if (megasas_ld_list_query(instance,
+                                       MR_LD_QUERY_TYPE_EXPOSED_TO_HOST))
+                               megasas_get_ld_list(instance);
                        for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) {
                                for (j = 0;
                                        j < MEGASAS_MAX_DEV_PER_CHANNEL;
@@ -5427,14 +5558,14 @@ megasas_aen_polling(struct work_struct *work)
                                        (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j;
 
                                        sdev1 = scsi_device_lookup(host,
-                                               i+MEGASAS_MAX_LD_CHANNELS,
+                                               MEGASAS_MAX_PD_CHANNELS + i,
                                                j, 0);
 
                                        if (instance->ld_ids[ld_index] !=
                                                                0xff) {
                                                if (!sdev1) {
                                                        scsi_add_device(host,
-                                                               i + 2,
+                                               MEGASAS_MAX_PD_CHANNELS + i,
                                                                j, 0);
                                                }
                                        }
@@ -5483,18 +5614,20 @@ megasas_aen_polling(struct work_struct *work)
                        }
                }
 
-               megasas_get_ld_list(instance);
+               if (megasas_ld_list_query(instance,
+                                         MR_LD_QUERY_TYPE_EXPOSED_TO_HOST))
+                       megasas_get_ld_list(instance);
                for (i = 0; i < MEGASAS_MAX_LD_CHANNELS; i++) {
                        for (j = 0; j < MEGASAS_MAX_DEV_PER_CHANNEL; j++) {
                                ld_index =
                                (i * MEGASAS_MAX_DEV_PER_CHANNEL) + j;
 
                                sdev1 = scsi_device_lookup(host,
-                                       i+MEGASAS_MAX_LD_CHANNELS, j, 0);
+                                       MEGASAS_MAX_PD_CHANNELS + i, j, 0);
                                if (instance->ld_ids[ld_index] != 0xff) {
                                        if (!sdev1) {
                                                scsi_add_device(host,
-                                                               i+2,
+                                               MEGASAS_MAX_PD_CHANNELS + i,
                                                                j, 0);
                                        } else {
                                                scsi_device_put(sdev1);
@@ -5514,7 +5647,7 @@ megasas_aen_polling(struct work_struct *work)
                return ;
        }
 
-       seq_num = instance->evt_detail->seq_num + 1;
+       seq_num = le32_to_cpu(instance->evt_detail->seq_num) + 1;
 
        /* Register AEN with FW for latest sequence number plus 1 */
        class_locale.members.reserved = 0;
index 4f401f7..e24b6eb 100644 (file)
@@ -126,17 +126,17 @@ static u8 MR_LdDataArmGet(u32 ld, u32 armIdx, struct MR_FW_RAID_MAP_ALL *map)
        return map->raidMap.ldSpanMap[ld].dataArmMap[armIdx];
 }
 
-static u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map)
+u16 MR_ArPdGet(u32 ar, u32 arm, struct MR_FW_RAID_MAP_ALL *map)
 {
-       return map->raidMap.arMapInfo[ar].pd[arm];
+       return le16_to_cpu(map->raidMap.arMapInfo[ar].pd[arm]);
 }
 
-static u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map)
+u16 MR_LdSpanArrayGet(u32 ld, u32 span, struct MR_FW_RAID_MAP_ALL *map)
 {
-       return map->raidMap.ldSpanMap[ld].spanBlock[span].span.arrayRef;
+       return le16_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].span.arrayRef);
 }
 
-static u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map)
+u16 MR_PdDevHandleGet(u32 pd, struct MR_FW_RAID_MAP_ALL *map)
 {
        return map->raidMap.devHndlInfo[pd].curDevHdl;
 }
@@ -148,7 +148,7 @@ u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map)
 
 u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map)
 {
-       return map->raidMap.ldTgtIdToLd[ldTgtId];
+       return le16_to_cpu(map->raidMap.ldTgtIdToLd[ldTgtId]);
 }
 
 static struct MR_LD_SPAN *MR_LdSpanPtrGet(u32 ld, u32 span,
@@ -167,18 +167,22 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance)
        struct LD_LOAD_BALANCE_INFO *lbInfo = fusion->load_balance_info;
        PLD_SPAN_INFO ldSpanInfo = fusion->log_to_span;
        struct MR_FW_RAID_MAP *pFwRaidMap = &map->raidMap;
+       struct MR_LD_RAID         *raid;
+       int ldCount, num_lds;
+       u16 ld;
+
 
-       if (pFwRaidMap->totalSize !=
+       if (le32_to_cpu(pFwRaidMap->totalSize) !=
            (sizeof(struct MR_FW_RAID_MAP) -sizeof(struct MR_LD_SPAN_MAP) +
-            (sizeof(struct MR_LD_SPAN_MAP) *pFwRaidMap->ldCount))) {
+            (sizeof(struct MR_LD_SPAN_MAP) * le32_to_cpu(pFwRaidMap->ldCount)))) {
                printk(KERN_ERR "megasas: map info structure size 0x%x is not matching with ld count\n",
                       (unsigned int)((sizeof(struct MR_FW_RAID_MAP) -
                                       sizeof(struct MR_LD_SPAN_MAP)) +
                                      (sizeof(struct MR_LD_SPAN_MAP) *
-                                      pFwRaidMap->ldCount)));
+                                       le32_to_cpu(pFwRaidMap->ldCount))));
                printk(KERN_ERR "megasas: span map %x, pFwRaidMap->totalSize "
                       ": %x\n", (unsigned int)sizeof(struct MR_LD_SPAN_MAP),
-                      pFwRaidMap->totalSize);
+                       le32_to_cpu(pFwRaidMap->totalSize));
                return 0;
        }
 
@@ -187,6 +191,15 @@ u8 MR_ValidateMapInfo(struct megasas_instance *instance)
 
        mr_update_load_balance_params(map, lbInfo);
 
+       num_lds = le32_to_cpu(map->raidMap.ldCount);
+
+       /*Convert Raid capability values to CPU arch */
+       for (ldCount = 0; ldCount < num_lds; ldCount++) {
+               ld = MR_TargetIdToLdGet(ldCount, map);
+               raid = MR_LdRaidGet(ld, map);
+               le32_to_cpus((u32 *)&raid->capability);
+       }
+
        return 1;
 }
 
@@ -200,23 +213,20 @@ u32 MR_GetSpanBlock(u32 ld, u64 row, u64 *span_blk,
 
        for (span = 0; span < raid->spanDepth; span++, pSpanBlock++) {
 
-               for (j = 0; j < pSpanBlock->block_span_info.noElements; j++) {
+               for (j = 0; j < le32_to_cpu(pSpanBlock->block_span_info.noElements); j++) {
                        quad = &pSpanBlock->block_span_info.quad[j];
 
-                       if (quad->diff == 0)
+                       if (le32_to_cpu(quad->diff) == 0)
                                return SPAN_INVALID;
-                       if (quad->logStart <= row  &&  row <= quad->logEnd  &&
-                           (mega_mod64(row-quad->logStart, quad->diff)) == 0) {
+                       if (le64_to_cpu(quad->logStart) <= row && row <=
+                               le64_to_cpu(quad->logEnd) && (mega_mod64(row - le64_to_cpu(quad->logStart),
+                               le32_to_cpu(quad->diff))) == 0) {
                                if (span_blk != NULL) {
                                        u64  blk, debugBlk;
-                                       blk =
-                                               mega_div64_32(
-                                                       (row-quad->logStart),
-                                                       quad->diff);
+                                       blk =  mega_div64_32((row-le64_to_cpu(quad->logStart)), le32_to_cpu(quad->diff));
                                        debugBlk = blk;
 
-                                       blk = (blk + quad->offsetInSpan) <<
-                                               raid->stripeShift;
+                                       blk = (blk + le64_to_cpu(quad->offsetInSpan)) << raid->stripeShift;
                                        *span_blk = blk;
                                }
                                return span;
@@ -257,8 +267,8 @@ static int getSpanInfo(struct MR_FW_RAID_MAP_ALL *map, PLD_SPAN_INFO ldSpanInfo)
                for (span = 0; span < raid->spanDepth; span++)
                        dev_dbg(&instance->pdev->dev, "Span=%x,"
                        " number of quads=%x\n", span,
-                       map->raidMap.ldSpanMap[ld].spanBlock[span].
-                       block_span_info.noElements);
+                       le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                       block_span_info.noElements));
                for (element = 0; element < MAX_QUAD_DEPTH; element++) {
                        span_set = &(ldSpanInfo[ld].span_set[element]);
                        if (span_set->span_row_data_width == 0)
@@ -286,22 +296,22 @@ static int getSpanInfo(struct MR_FW_RAID_MAP_ALL *map, PLD_SPAN_INFO ldSpanInfo)
                                (long unsigned int)span_set->data_strip_end);
 
                        for (span = 0; span < raid->spanDepth; span++) {
-                               if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                                       block_span_info.noElements >=
+                               if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                                       block_span_info.noElements) >=
                                        element + 1) {
                                        quad = &map->raidMap.ldSpanMap[ld].
                                                spanBlock[span].block_span_info.
                                                quad[element];
                                dev_dbg(&instance->pdev->dev, "Span=%x,"
                                        "Quad=%x, diff=%x\n", span,
-                                       element, quad->diff);
+                                       element, le32_to_cpu(quad->diff));
                                dev_dbg(&instance->pdev->dev,
                                        "offset_in_span=0x%08lx\n",
-                                       (long unsigned int)quad->offsetInSpan);
+                                       (long unsigned int)le64_to_cpu(quad->offsetInSpan));
                                dev_dbg(&instance->pdev->dev,
                                        "logical start=0x%08lx, end=0x%08lx\n",
-                                       (long unsigned int)quad->logStart,
-                                       (long unsigned int)quad->logEnd);
+                                       (long unsigned int)le64_to_cpu(quad->logStart),
+                                       (long unsigned int)le64_to_cpu(quad->logEnd));
                                }
                        }
                }
@@ -348,23 +358,23 @@ u32 mr_spanset_get_span_block(struct megasas_instance *instance,
                        continue;
 
                for (span = 0; span < raid->spanDepth; span++)
-                       if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                               block_span_info.noElements >= info+1) {
+                       if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                               block_span_info.noElements) >= info+1) {
                                quad = &map->raidMap.ldSpanMap[ld].
                                        spanBlock[span].
                                        block_span_info.quad[info];
-                               if (quad->diff == 0)
+                               if (le32_to_cpu(quad->diff == 0))
                                        return SPAN_INVALID;
-                               if (quad->logStart <= row  &&
-                                       row <= quad->logEnd  &&
-                                       (mega_mod64(row - quad->logStart,
-                                               quad->diff)) == 0) {
+                               if (le64_to_cpu(quad->logStart) <= row  &&
+                                       row <= le64_to_cpu(quad->logEnd)  &&
+                                       (mega_mod64(row - le64_to_cpu(quad->logStart),
+                                               le32_to_cpu(quad->diff))) == 0) {
                                        if (span_blk != NULL) {
                                                u64  blk;
                                                blk = mega_div64_32
-                                                   ((row - quad->logStart),
-                                                   quad->diff);
-                                               blk = (blk + quad->offsetInSpan)
+                                                   ((row - le64_to_cpu(quad->logStart)),
+                                                   le32_to_cpu(quad->diff));
+                                               blk = (blk + le64_to_cpu(quad->offsetInSpan))
                                                         << raid->stripeShift;
                                                *span_blk = blk;
                                        }
@@ -415,8 +425,8 @@ static u64  get_row_from_strip(struct megasas_instance *instance,
                span_set_Row = mega_div64_32(span_set_Strip,
                                span_set->span_row_data_width) * span_set->diff;
                for (span = 0, span_offset = 0; span < raid->spanDepth; span++)
-                       if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                               block_span_info.noElements >= info+1) {
+                       if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                               block_span_info.noElements >= info+1)) {
                                if (strip_offset >=
                                        span_set->strip_offset[span])
                                        span_offset++;
@@ -480,18 +490,18 @@ static u64 get_strip_from_row(struct megasas_instance *instance,
                        continue;
 
                for (span = 0; span < raid->spanDepth; span++)
-                       if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                               block_span_info.noElements >= info+1) {
+                       if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                               block_span_info.noElements) >= info+1) {
                                quad = &map->raidMap.ldSpanMap[ld].
                                        spanBlock[span].block_span_info.quad[info];
-                               if (quad->logStart <= row  &&
-                                       row <= quad->logEnd  &&
-                                       mega_mod64((row - quad->logStart),
-                                       quad->diff) == 0) {
+                               if (le64_to_cpu(quad->logStart) <= row  &&
+                                       row <= le64_to_cpu(quad->logEnd)  &&
+                                       mega_mod64((row - le64_to_cpu(quad->logStart)),
+                                       le32_to_cpu(quad->diff)) == 0) {
                                        strip = mega_div64_32
                                                (((row - span_set->data_row_start)
-                                                       - quad->logStart),
-                                                       quad->diff);
+                                                       - le64_to_cpu(quad->logStart)),
+                                                       le32_to_cpu(quad->diff));
                                        strip *= span_set->span_row_data_width;
                                        strip += span_set->data_strip_start;
                                        strip += span_set->strip_offset[span];
@@ -543,8 +553,8 @@ static u32 get_arm_from_strip(struct megasas_instance *instance,
                                span_set->span_row_data_width);
 
                for (span = 0, span_offset = 0; span < raid->spanDepth; span++)
-                       if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                               block_span_info.noElements >= info+1) {
+                       if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                               block_span_info.noElements) >= info+1) {
                                if (strip_offset >=
                                        span_set->strip_offset[span])
                                        span_offset =
@@ -669,7 +679,7 @@ static u8 mr_spanset_get_phy_params(struct megasas_instance *instance, u32 ld,
                }
        }
 
-       *pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk;
+       *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk);
        pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) |
                                        physArm;
        return retval;
@@ -765,7 +775,7 @@ u8 MR_GetPhyParams(struct megasas_instance *instance, u32 ld, u64 stripRow,
                }
        }
 
-       *pdBlock += stripRef + MR_LdSpanPtrGet(ld, span, map)->startBlk;
+       *pdBlock += stripRef + le64_to_cpu(MR_LdSpanPtrGet(ld, span, map)->startBlk);
        pRAID_Context->spanArm = (span << RAID_CTX_SPANARM_SPAN_SHIFT) |
                physArm;
        return retval;
@@ -784,7 +794,7 @@ u8
 MR_BuildRaidContext(struct megasas_instance *instance,
                    struct IO_REQUEST_INFO *io_info,
                    struct RAID_CONTEXT *pRAID_Context,
-                   struct MR_FW_RAID_MAP_ALL *map)
+                   struct MR_FW_RAID_MAP_ALL *map, u8 **raidLUN)
 {
        struct MR_LD_RAID  *raid;
        u32         ld, stripSize, stripe_mask;
@@ -965,7 +975,7 @@ MR_BuildRaidContext(struct megasas_instance *instance,
                        regSize += stripSize;
        }
 
-       pRAID_Context->timeoutValue     = map->raidMap.fpPdIoTimeoutSec;
+       pRAID_Context->timeoutValue     = cpu_to_le16(map->raidMap.fpPdIoTimeoutSec);
        if ((instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) ||
                (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY))
                pRAID_Context->regLockFlags = (isRead) ?
@@ -974,9 +984,12 @@ MR_BuildRaidContext(struct megasas_instance *instance,
                pRAID_Context->regLockFlags = (isRead) ?
                        REGION_TYPE_SHARED_READ : raid->regTypeReqOnWrite;
        pRAID_Context->VirtualDiskTgtId = raid->targetId;
-       pRAID_Context->regLockRowLBA    = regStart;
-       pRAID_Context->regLockLength    = regSize;
+       pRAID_Context->regLockRowLBA    = cpu_to_le64(regStart);
+       pRAID_Context->regLockLength    = cpu_to_le32(regSize);
        pRAID_Context->configSeqNum     = raid->seqNum;
+       /* save pointer to raid->LUN array */
+       *raidLUN = raid->LUN;
+
 
        /*Get Phy Params only if FP capable, or else leave it to MR firmware
          to do the calculation.*/
@@ -1047,8 +1060,8 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map,
                raid = MR_LdRaidGet(ld, map);
                for (element = 0; element < MAX_QUAD_DEPTH; element++) {
                        for (span = 0; span < raid->spanDepth; span++) {
-                               if (map->raidMap.ldSpanMap[ld].spanBlock[span].
-                                       block_span_info.noElements <
+                               if (le32_to_cpu(map->raidMap.ldSpanMap[ld].spanBlock[span].
+                                       block_span_info.noElements) <
                                        element + 1)
                                        continue;
                                span_set = &(ldSpanInfo[ld].span_set[element]);
@@ -1056,14 +1069,14 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map,
                                        spanBlock[span].block_span_info.
                                        quad[element];
 
-                               span_set->diff = quad->diff;
+                               span_set->diff = le32_to_cpu(quad->diff);
 
                                for (count = 0, span_row_width = 0;
                                        count < raid->spanDepth; count++) {
-                                       if (map->raidMap.ldSpanMap[ld].
+                                       if (le32_to_cpu(map->raidMap.ldSpanMap[ld].
                                                spanBlock[count].
                                                block_span_info.
-                                               noElements >= element + 1) {
+                                               noElements) >= element + 1) {
                                                span_set->strip_offset[count] =
                                                        span_row_width;
                                                span_row_width +=
@@ -1077,9 +1090,9 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map,
                                }
 
                                span_set->span_row_data_width = span_row_width;
-                               span_row = mega_div64_32(((quad->logEnd -
-                                       quad->logStart) + quad->diff),
-                                       quad->diff);
+                               span_row = mega_div64_32(((le64_to_cpu(quad->logEnd) -
+                                       le64_to_cpu(quad->logStart)) + le32_to_cpu(quad->diff)),
+                                       le32_to_cpu(quad->diff));
 
                                if (element == 0) {
                                        span_set->log_start_lba = 0;
@@ -1096,7 +1109,7 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map,
 
                                        span_set->data_row_start = 0;
                                        span_set->data_row_end =
-                                               (span_row * quad->diff) - 1;
+                                               (span_row * le32_to_cpu(quad->diff)) - 1;
                                } else {
                                        span_set_prev = &(ldSpanInfo[ld].
                                                        span_set[element - 1]);
@@ -1122,7 +1135,7 @@ void mr_update_span_set(struct MR_FW_RAID_MAP_ALL *map,
                                                span_set_prev->data_row_end + 1;
                                        span_set->data_row_end =
                                                span_set->data_row_start +
-                                               (span_row * quad->diff) - 1;
+                                               (span_row * le32_to_cpu(quad->diff)) - 1;
                                }
                                break;
                }
index 417d5f1..f655592 100644 (file)
@@ -72,17 +72,6 @@ megasas_clear_intr_fusion(struct megasas_register_set __iomem *regs);
 int
 megasas_issue_polled(struct megasas_instance *instance,
                     struct megasas_cmd *cmd);
-
-u8
-MR_BuildRaidContext(struct megasas_instance *instance,
-                   struct IO_REQUEST_INFO *io_info,
-                   struct RAID_CONTEXT *pRAID_Context,
-                   struct MR_FW_RAID_MAP_ALL *map);
-u16 MR_TargetIdToLdGet(u32 ldTgtId, struct MR_FW_RAID_MAP_ALL *map);
-struct MR_LD_RAID *MR_LdRaidGet(u32 ld, struct MR_FW_RAID_MAP_ALL *map);
-
-u16 MR_GetLDTgtId(u32 ld, struct MR_FW_RAID_MAP_ALL *map);
-
 void
 megasas_check_and_restore_queue_depth(struct megasas_instance *instance);
 
@@ -626,23 +615,20 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
 
        IOCInitMessage->Function = MPI2_FUNCTION_IOC_INIT;
        IOCInitMessage->WhoInit = MPI2_WHOINIT_HOST_DRIVER;
-       IOCInitMessage->MsgVersion = MPI2_VERSION;
-       IOCInitMessage->HeaderVersion = MPI2_HEADER_VERSION;
-       IOCInitMessage->SystemRequestFrameSize =
-               MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4;
-
-       IOCInitMessage->ReplyDescriptorPostQueueDepth = fusion->reply_q_depth;
-       IOCInitMessage->ReplyDescriptorPostQueueAddress =
-               fusion->reply_frames_desc_phys;
-       IOCInitMessage->SystemRequestFrameBaseAddress =
-               fusion->io_request_frames_phys;
+       IOCInitMessage->MsgVersion = cpu_to_le16(MPI2_VERSION);
+       IOCInitMessage->HeaderVersion = cpu_to_le16(MPI2_HEADER_VERSION);
+       IOCInitMessage->SystemRequestFrameSize = cpu_to_le16(MEGA_MPI2_RAID_DEFAULT_IO_FRAME_SIZE / 4);
+
+       IOCInitMessage->ReplyDescriptorPostQueueDepth = cpu_to_le16(fusion->reply_q_depth);
+       IOCInitMessage->ReplyDescriptorPostQueueAddress = cpu_to_le64(fusion->reply_frames_desc_phys);
+       IOCInitMessage->SystemRequestFrameBaseAddress = cpu_to_le64(fusion->io_request_frames_phys);
        IOCInitMessage->HostMSIxVectors = instance->msix_vectors;
        init_frame = (struct megasas_init_frame *)cmd->frame;
        memset(init_frame, 0, MEGAMFI_FRAME_SIZE);
 
        frame_hdr = &cmd->frame->hdr;
        frame_hdr->cmd_status = 0xFF;
-       frame_hdr->flags |= MFI_FRAME_DONT_POST_IN_REPLY_QUEUE;
+       frame_hdr->flags |= cpu_to_le16(MFI_FRAME_DONT_POST_IN_REPLY_QUEUE);
 
        init_frame->cmd = MFI_CMD_INIT;
        init_frame->cmd_status = 0xFF;
@@ -652,17 +638,24 @@ megasas_ioc_init_fusion(struct megasas_instance *instance)
                (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY))
                init_frame->driver_operations.
                        mfi_capabilities.support_additional_msix = 1;
+       /* driver supports HA / Remote LUN over Fast Path interface */
+       init_frame->driver_operations.mfi_capabilities.support_fp_remote_lun
+               = 1;
+       /* Convert capability to LE32 */
+       cpu_to_le32s((u32 *)&init_frame->driver_operations.mfi_capabilities);
 
-       init_frame->queue_info_new_phys_addr_lo = ioc_init_handle;
-       init_frame->data_xfer_len = sizeof(struct MPI2_IOC_INIT_REQUEST);
+       init_frame->queue_info_new_phys_addr_lo = cpu_to_le32((u32)ioc_init_handle);
+       init_frame->data_xfer_len = cpu_to_le32(sizeof(struct MPI2_IOC_INIT_REQUEST));
 
        req_desc =
          (union MEGASAS_REQUEST_DESCRIPTOR_UNION *)fusion->req_frames_desc;
 
-       req_desc->Words = cmd->frame_phys_addr;
+       req_desc->Words = 0;
        req_desc->MFAIo.RequestFlags =
                (MEGASAS_REQ_DESCRIPT_FLAGS_MFA <<
                 MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+       cpu_to_le32s((u32 *)&req_desc->MFAIo);
+       req_desc->Words |= cpu_to_le64(cmd->frame_phys_addr);
 
        /*
         * disable the intr before firing the init frame
@@ -753,13 +746,13 @@ megasas_get_ld_map_info(struct megasas_instance *instance)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_READ;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_READ);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
-       dcmd->data_xfer_len = size_map_info;
-       dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO;
-       dcmd->sgl.sge32[0].phys_addr = ci_h;
-       dcmd->sgl.sge32[0].length = size_map_info;
+       dcmd->data_xfer_len = cpu_to_le32(size_map_info);
+       dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info);
 
        if (!megasas_issue_polled(instance, cmd))
                ret = 0;
@@ -828,7 +821,7 @@ megasas_sync_map_info(struct megasas_instance *instance)
 
        map = fusion->ld_map[instance->map_id & 1];
 
-       num_lds = map->raidMap.ldCount;
+       num_lds = le32_to_cpu(map->raidMap.ldCount);
 
        dcmd = &cmd->frame->dcmd;
 
@@ -856,15 +849,15 @@ megasas_sync_map_info(struct megasas_instance *instance)
        dcmd->cmd = MFI_CMD_DCMD;
        dcmd->cmd_status = 0xFF;
        dcmd->sge_count = 1;
-       dcmd->flags = MFI_FRAME_DIR_WRITE;
+       dcmd->flags = cpu_to_le16(MFI_FRAME_DIR_WRITE);
        dcmd->timeout = 0;
        dcmd->pad_0 = 0;
-       dcmd->data_xfer_len = size_map_info;
+       dcmd->data_xfer_len = cpu_to_le32(size_map_info);
        dcmd->mbox.b[0] = num_lds;
        dcmd->mbox.b[1] = MEGASAS_DCMD_MBOX_PEND_FLAG;
-       dcmd->opcode = MR_DCMD_LD_MAP_GET_INFO;
-       dcmd->sgl.sge32[0].phys_addr = ci_h;
-       dcmd->sgl.sge32[0].length = size_map_info;
+       dcmd->opcode = cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO);
+       dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h);
+       dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info);
 
        instance->map_update_cmd = cmd;
 
@@ -1067,9 +1060,8 @@ megasas_fire_cmd_fusion(struct megasas_instance *instance,
 
        spin_lock_irqsave(&instance->hba_lock, flags);
 
-       writel(req_desc_lo,
-              &(regs)->inbound_low_queue_port);
-       writel(req_desc_hi, &(regs)->inbound_high_queue_port);
+       writel(le32_to_cpu(req_desc_lo), &(regs)->inbound_low_queue_port);
+       writel(le32_to_cpu(req_desc_hi), &(regs)->inbound_high_queue_port);
        spin_unlock_irqrestore(&instance->hba_lock, flags);
 }
 
@@ -1157,8 +1149,8 @@ megasas_make_sgl_fusion(struct megasas_instance *instance,
                return sge_count;
 
        scsi_for_each_sg(scp, os_sgl, sge_count, i) {
-               sgl_ptr->Length = sg_dma_len(os_sgl);
-               sgl_ptr->Address = sg_dma_address(os_sgl);
+               sgl_ptr->Length = cpu_to_le32(sg_dma_len(os_sgl));
+               sgl_ptr->Address = cpu_to_le64(sg_dma_address(os_sgl));
                sgl_ptr->Flags = 0;
                if ((instance->pdev->device == PCI_DEVICE_ID_LSI_INVADER) ||
                        (instance->pdev->device == PCI_DEVICE_ID_LSI_FURY)) {
@@ -1177,9 +1169,9 @@ megasas_make_sgl_fusion(struct megasas_instance *instance,
                                PCI_DEVICE_ID_LSI_INVADER) ||
                                (instance->pdev->device ==
                                PCI_DEVICE_ID_LSI_FURY)) {
-                               if ((cmd->io_request->IoFlags &
-                               MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) !=
-                               MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
+                               if ((le16_to_cpu(cmd->io_request->IoFlags) &
+                                       MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH) !=
+                                       MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH)
                                        cmd->io_request->ChainOffset =
                                                fusion->
                                                chain_offset_io_request;
@@ -1201,9 +1193,8 @@ megasas_make_sgl_fusion(struct megasas_instance *instance,
                                sg_chain->Flags =
                                        (IEEE_SGE_FLAGS_CHAIN_ELEMENT |
                                         MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR);
-                       sg_chain->Length =  (sizeof(union MPI2_SGE_IO_UNION)
-                                            *(sge_count - sg_processed));
-                       sg_chain->Address = cmd->sg_frame_phys_addr;
+                       sg_chain->Length =  cpu_to_le32((sizeof(union MPI2_SGE_IO_UNION) * (sge_count - sg_processed)));
+                       sg_chain->Address = cpu_to_le64(cmd->sg_frame_phys_addr);
 
                        sgl_ptr =
                          (struct MPI25_IEEE_SGE_CHAIN64 *)cmd->sg_frame;
@@ -1261,7 +1252,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
                io_request->CDB.EEDP32.PrimaryReferenceTag =
                        cpu_to_be32(ref_tag);
                io_request->CDB.EEDP32.PrimaryApplicationTagMask = 0xffff;
-               io_request->IoFlags = 32; /* Specify 32-byte cdb */
+               io_request->IoFlags = cpu_to_le16(32); /* Specify 32-byte cdb */
 
                /* Transfer length */
                cdb[28] = (u8)((num_blocks >> 24) & 0xff);
@@ -1271,19 +1262,19 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
 
                /* set SCSI IO EEDPFlags */
                if (scp->sc_data_direction == PCI_DMA_FROMDEVICE) {
-                       io_request->EEDPFlags =
+                       io_request->EEDPFlags = cpu_to_le16(
                                MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG  |
                                MPI2_SCSIIO_EEDPFLAGS_CHECK_REFTAG |
                                MPI2_SCSIIO_EEDPFLAGS_CHECK_REMOVE_OP |
                                MPI2_SCSIIO_EEDPFLAGS_CHECK_APPTAG |
-                               MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD;
+                               MPI2_SCSIIO_EEDPFLAGS_CHECK_GUARD);
                } else {
-                       io_request->EEDPFlags =
+                       io_request->EEDPFlags = cpu_to_le16(
                                MPI2_SCSIIO_EEDPFLAGS_INC_PRI_REFTAG |
-                               MPI2_SCSIIO_EEDPFLAGS_INSERT_OP;
+                               MPI2_SCSIIO_EEDPFLAGS_INSERT_OP);
                }
-               io_request->Control |= (0x4 << 26);
-               io_request->EEDPBlockSize = scp->device->sector_size;
+               io_request->Control |= cpu_to_le32((0x4 << 26));
+               io_request->EEDPBlockSize = cpu_to_le32(scp->device->sector_size);
        } else {
                /* Some drives don't support 16/12 byte CDB's, convert to 10 */
                if (((cdb_len == 12) || (cdb_len == 16)) &&
@@ -1311,7 +1302,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
                        cdb[8] = (u8)(num_blocks & 0xff);
                        cdb[7] = (u8)((num_blocks >> 8) & 0xff);
 
-                       io_request->IoFlags = 10; /* Specify 10-byte cdb */
+                       io_request->IoFlags = cpu_to_le16(10); /* Specify 10-byte cdb */
                        cdb_len = 10;
                } else if ((cdb_len < 16) && (start_blk > 0xffffffff)) {
                        /* Convert to 16 byte CDB for large LBA's */
@@ -1349,7 +1340,7 @@ megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
                        cdb[11] = (u8)((num_blocks >> 16) & 0xff);
                        cdb[10] = (u8)((num_blocks >> 24) & 0xff);
 
-                       io_request->IoFlags = 16; /* Specify 16-byte cdb */
+                       io_request->IoFlags = cpu_to_le16(16); /* Specify 16-byte cdb */
                        cdb_len = 16;
                }
 
@@ -1410,13 +1401,14 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
        struct IO_REQUEST_INFO io_info;
        struct fusion_context *fusion;
        struct MR_FW_RAID_MAP_ALL *local_map_ptr;
+       u8 *raidLUN;
 
        device_id = MEGASAS_DEV_INDEX(instance, scp);
 
        fusion = instance->ctrl_context;
 
        io_request = cmd->io_request;
-       io_request->RaidContext.VirtualDiskTgtId = device_id;
+       io_request->RaidContext.VirtualDiskTgtId = cpu_to_le16(device_id);
        io_request->RaidContext.status = 0;
        io_request->RaidContext.exStatus = 0;
 
@@ -1480,7 +1472,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
        io_info.ldStartBlock = ((u64)start_lba_hi << 32) | start_lba_lo;
        io_info.numBlocks = datalength;
        io_info.ldTgtId = device_id;
-       io_request->DataLength = scsi_bufflen(scp);
+       io_request->DataLength = cpu_to_le32(scsi_bufflen(scp));
 
        if (scp->sc_data_direction == PCI_DMA_FROMDEVICE)
                io_info.isRead = 1;
@@ -1494,7 +1486,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
        } else {
                if (MR_BuildRaidContext(instance, &io_info,
                                        &io_request->RaidContext,
-                                       local_map_ptr))
+                                       local_map_ptr, &raidLUN))
                        fp_possible = io_info.fpOkForIo;
        }
 
@@ -1520,8 +1512,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
                                        MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
                        io_request->RaidContext.Type = MPI2_TYPE_CUDA;
                        io_request->RaidContext.nseg = 0x1;
-                       io_request->IoFlags |=
-                         MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH;
+                       io_request->IoFlags |= cpu_to_le16(MPI25_SAS_DEVICE0_FLAGS_ENABLED_FAST_PATH);
                        io_request->RaidContext.regLockFlags |=
                          (MR_RL_FLAGS_GRANT_DESTINATION_CUDA |
                           MR_RL_FLAGS_SEQ_NUM_ENABLE);
@@ -1537,9 +1528,11 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
                        scp->SCp.Status &= ~MEGASAS_LOAD_BALANCE_FLAG;
                cmd->request_desc->SCSIIO.DevHandle = io_info.devHandle;
                io_request->DevHandle = io_info.devHandle;
+               /* populate the LUN field */
+               memcpy(io_request->LUN, raidLUN, 8);
        } else {
                io_request->RaidContext.timeoutValue =
-                       local_map_ptr->raidMap.fpPdIoTimeoutSec;
+                       cpu_to_le16(local_map_ptr->raidMap.fpPdIoTimeoutSec);
                cmd->request_desc->SCSIIO.RequestFlags =
                        (MEGASAS_REQ_DESCRIPT_FLAGS_LD_IO
                         << MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
@@ -1557,7 +1550,7 @@ megasas_build_ldio_fusion(struct megasas_instance *instance,
                        io_request->RaidContext.nseg = 0x1;
                }
                io_request->Function = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST;
-               io_request->DevHandle = device_id;
+               io_request->DevHandle = cpu_to_le16(device_id);
        } /* Not FP */
 }
 
@@ -1579,6 +1572,11 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance,
        u16 pd_index = 0;
        struct MR_FW_RAID_MAP_ALL *local_map_ptr;
        struct fusion_context *fusion = instance->ctrl_context;
+       u8                          span, physArm;
+       u16                         devHandle;
+       u32                         ld, arRef, pd;
+       struct MR_LD_RAID                  *raid;
+       struct RAID_CONTEXT                *pRAID_Context;
 
        io_request = cmd->io_request;
        device_id = MEGASAS_DEV_INDEX(instance, scmd);
@@ -1586,6 +1584,9 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance,
                +scmd->device->id;
        local_map_ptr = fusion->ld_map[(instance->map_id & 1)];
 
+       io_request->DataLength = cpu_to_le32(scsi_bufflen(scmd));
+
+
        /* Check if this is a system PD I/O */
        if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS &&
            instance->pd_list[pd_index].driveState == MR_PD_STATE_SYSTEM) {
@@ -1623,15 +1624,62 @@ megasas_build_dcdb_fusion(struct megasas_instance *instance,
                                        scmd->request->timeout / HZ;
                }
        } else {
+               if (scmd->device->channel < MEGASAS_MAX_PD_CHANNELS)
+                       goto NonFastPath;
+
+               ld = MR_TargetIdToLdGet(device_id, local_map_ptr);
+               if ((ld >= MAX_LOGICAL_DRIVES) || (!fusion->fast_path_io))
+                       goto NonFastPath;
+
+               raid = MR_LdRaidGet(ld, local_map_ptr);
+
+               /* check if this LD is FP capable */
+               if (!(raid->capability.fpNonRWCapable))
+                       /* not FP capable, send as non-FP */
+                       goto NonFastPath;
+
+               /* get RAID_Context pointer */
+               pRAID_Context = &io_request->RaidContext;
+
+               /* set RAID context values */
+               pRAID_Context->regLockFlags     = REGION_TYPE_SHARED_READ;
+               pRAID_Context->timeoutValue     = raid->fpIoTimeoutForLd;
+               pRAID_Context->VirtualDiskTgtId = cpu_to_le16(device_id);
+               pRAID_Context->regLockRowLBA    = 0;
+               pRAID_Context->regLockLength    = 0;
+               pRAID_Context->configSeqNum     = raid->seqNum;
+
+               /* get the DevHandle for the PD (since this is
+                  fpNonRWCapable, this is a single disk RAID0) */
+               span = physArm = 0;
+               arRef = MR_LdSpanArrayGet(ld, span, local_map_ptr);
+               pd = MR_ArPdGet(arRef, physArm, local_map_ptr);
+               devHandle = MR_PdDevHandleGet(pd, local_map_ptr);
+
+               /* build request descriptor */
+               cmd->request_desc->SCSIIO.RequestFlags =
+                       (MPI2_REQ_DESCRIPT_FLAGS_HIGH_PRIORITY <<
+                        MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
+               cmd->request_desc->SCSIIO.DevHandle = devHandle;
+
+               /* populate the LUN field */
+               memcpy(io_request->LUN, raid->LUN, 8);
+
+               /* build the raidScsiIO structure */
+               io_request->Function = MPI2_FUNCTION_SCSI_IO_REQUEST;
+               io_request->DevHandle = devHandle;
+
+               return;
+
+NonFastPath:
                io_request->Function  = MEGASAS_MPI2_FUNCTION_LD_IO_REQUEST;
-               io_request->DevHandle = device_id;
+               io_request->DevHandle = cpu_to_le16(device_id);
                cmd->request_desc->SCSIIO.RequestFlags =
                        (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
                         MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
        }
-       io_request->RaidContext.VirtualDiskTgtId = device_id;
+       io_request->RaidContext.VirtualDiskTgtId = cpu_to_le16(device_id);
        io_request->LUN[1] = scmd->device->lun;
-       io_request->DataLength = scsi_bufflen(scmd);
 }
 
 /**
@@ -1670,7 +1718,7 @@ megasas_build_io_fusion(struct megasas_instance *instance,
         * Just the CDB length,rest of the Flags are zero
         * This will be modified for FP in build_ldio_fusion
         */
-       io_request->IoFlags = scp->cmd_len;
+       io_request->IoFlags = cpu_to_le16(scp->cmd_len);
 
        if (megasas_is_ldio(scp))
                megasas_build_ldio_fusion(instance, scp, cmd);
@@ -1695,17 +1743,17 @@ megasas_build_io_fusion(struct megasas_instance *instance,
 
        io_request->RaidContext.numSGE = sge_count;
 
-       io_request->SGLFlags = MPI2_SGE_FLAGS_64_BIT_ADDRESSING;
+       io_request->SGLFlags = cpu_to_le16(MPI2_SGE_FLAGS_64_BIT_ADDRESSING);
 
        if (scp->sc_data_direction == PCI_DMA_TODEVICE)
-               io_request->Control |= MPI2_SCSIIO_CONTROL_WRITE;
+               io_request->Control |= cpu_to_le32(MPI2_SCSIIO_CONTROL_WRITE);
        else if (scp->sc_data_direction == PCI_DMA_FROMDEVICE)
-               io_request->Control |= MPI2_SCSIIO_CONTROL_READ;
+               io_request->Control |= cpu_to_le32(MPI2_SCSIIO_CONTROL_READ);
 
        io_request->SGLOffset0 =
                offsetof(struct MPI2_RAID_SCSI_IO_REQUEST, SGL) / 4;
 
-       io_request->SenseBufferLowAddress = cmd->sense_phys_addr;
+       io_request->SenseBufferLowAddress = cpu_to_le32(cmd->sense_phys_addr);
        io_request->SenseBufferLength = SCSI_SENSE_BUFFERSIZE;
 
        cmd->scmd = scp;
@@ -1770,7 +1818,7 @@ megasas_build_and_issue_cmd_fusion(struct megasas_instance *instance,
        }
 
        req_desc = cmd->request_desc;
-       req_desc->SCSIIO.SMID = index;
+       req_desc->SCSIIO.SMID = cpu_to_le16(index);
 
        if (cmd->io_request->ChainOffset != 0 &&
            cmd->io_request->ChainOffset != 0xF)
@@ -1832,7 +1880,7 @@ complete_cmd_fusion(struct megasas_instance *instance, u32 MSIxIndex)
        num_completed = 0;
 
        while ((d_val.u.low != UINT_MAX) && (d_val.u.high != UINT_MAX)) {
-               smid = reply_desc->SMID;
+               smid = le16_to_cpu(reply_desc->SMID);
 
                cmd_fusion = fusion->cmd_list[smid - 1];
 
@@ -2050,12 +2098,12 @@ build_mpt_mfi_pass_thru(struct megasas_instance *instance,
                                       SGL) / 4;
        io_req->ChainOffset = fusion->chain_offset_mfi_pthru;
 
-       mpi25_ieee_chain->Address = mfi_cmd->frame_phys_addr;
+       mpi25_ieee_chain->Address = cpu_to_le64(mfi_cmd->frame_phys_addr);
 
        mpi25_ieee_chain->Flags = IEEE_SGE_FLAGS_CHAIN_ELEMENT |
                MPI2_IEEE_SGE_FLAGS_IOCPLBNTA_ADDR;
 
-       mpi25_ieee_chain->Length = MEGASAS_MAX_SZ_CHAIN_FRAME;
+       mpi25_ieee_chain->Length = cpu_to_le32(MEGASAS_MAX_SZ_CHAIN_FRAME);
 
        return 0;
 }
@@ -2088,7 +2136,7 @@ build_mpt_cmd(struct megasas_instance *instance, struct megasas_cmd *cmd)
        req_desc->SCSIIO.RequestFlags = (MPI2_REQ_DESCRIPT_FLAGS_SCSI_IO <<
                                         MEGASAS_REQ_DESCRIPT_FLAGS_TYPE_SHIFT);
 
-       req_desc->SCSIIO.SMID = index;
+       req_desc->SCSIIO.SMID = cpu_to_le16(index);
 
        return req_desc;
 }
index 4eb8401..35a5139 100644 (file)
@@ -93,8 +93,13 @@ enum MR_RAID_FLAGS_IO_SUB_TYPE {
  */
 
 struct RAID_CONTEXT {
+#if   defined(__BIG_ENDIAN_BITFIELD)
+       u8      nseg:4;
+       u8      Type:4;
+#else
        u8      Type:4;
        u8      nseg:4;
+#endif
        u8      resvd0;
        u16     timeoutValue;
        u8      regLockFlags;
@@ -298,8 +303,13 @@ struct MPI2_RAID_SCSI_IO_REQUEST {
  * MPT RAID MFA IO Descriptor.
  */
 struct MEGASAS_RAID_MFA_IO_REQUEST_DESCRIPTOR {
+#if   defined(__BIG_ENDIAN_BITFIELD)
+       u32     MessageAddress1:24; /* bits 31:8*/
+       u32     RequestFlags:8;
+#else
        u32     RequestFlags:8;
        u32     MessageAddress1:24; /* bits 31:8*/
+#endif
        u32     MessageAddress2;      /* bits 61:32 */
 };
 
@@ -518,6 +528,19 @@ struct MR_SPAN_BLOCK_INFO {
 
 struct MR_LD_RAID {
        struct {
+#if   defined(__BIG_ENDIAN_BITFIELD)
+               u32     reserved4:7;
+               u32     fpNonRWCapable:1;
+               u32     fpReadAcrossStripe:1;
+               u32     fpWriteAcrossStripe:1;
+               u32     fpReadCapable:1;
+               u32     fpWriteCapable:1;
+               u32     encryptionType:8;
+               u32     pdPiMode:4;
+               u32     ldPiMode:4;
+               u32     reserved5:3;
+               u32     fpCapable:1;
+#else
                u32     fpCapable:1;
                u32     reserved5:3;
                u32     ldPiMode:4;
@@ -527,7 +550,9 @@ struct MR_LD_RAID {
                u32     fpReadCapable:1;
                u32     fpWriteAcrossStripe:1;
                u32     fpReadAcrossStripe:1;
-               u32     reserved4:8;
+               u32     fpNonRWCapable:1;
+               u32     reserved4:7;
+#endif
        } capability;
        u32     reserved6;
        u64     size;
@@ -551,7 +576,9 @@ struct MR_LD_RAID {
                u32 reserved:31;
        } flags;
 
-       u8      reserved3[0x5C];
+       u8      LUN[8]; /* 0x24 8 byte LUN field used for SCSI IO's */
+       u8      fpIoTimeoutForLd;/*0x2C timeout value used by driver in FP IO*/
+       u8      reserved3[0x80-0x2D]; /* 0x2D */
 };
 
 struct MR_LD_SPAN_MAP {
index 4c1d2e7..efb0c4c 100644 (file)
@@ -1,5 +1,5 @@
 # mpt3sas makefile
-obj-m += mpt3sas.o
+obj-$(CONFIG_SCSI_MPT3SAS) += mpt3sas.o
 mpt3sas-y +=  mpt3sas_base.o     \
                mpt3sas_config.o \
                mpt3sas_scsih.o      \
index ff12d46..5964800 100644 (file)
@@ -10,7 +10,7 @@
  *
  *  Forward port and refactoring to modern qla2xxx and target/configfs
  *
- *  Copyright (C) 2010-2011 Nicholas A. Bellinger <nab@kernel.org>
+ *  Copyright (C) 2010-2013 Nicholas A. Bellinger <nab@kernel.org>
  *
  *  This program is free software; you can redistribute it and/or
  *  modify it under the terms of the GNU General Public License
index a6da313..f85b9e5 100644 (file)
@@ -2,12 +2,9 @@
  * This file contains tcm implementation using v4 configfs fabric infrastructure
  * for QLogic target mode HBAs
  *
- * ?? Copyright 2010-2011 RisingTide Systems LLC.
+ * (c) Copyright 2010-2013 Datera, Inc.
  *
- * Licensed to the Linux Foundation under the General Public License (GPL)
- * version 2.
- *
- * Author: Nicholas A. Bellinger <nab@risingtidesystems.com>
+ * Author: Nicholas A. Bellinger <nab@daterainc.com>
  *
  * tcm_qla2xxx_parse_wwn() and tcm_qla2xxx_format_wwn() contains code from
  * the TCM_FC / Open-FCoE.org fabric module.
@@ -360,6 +357,14 @@ static int tcm_qla2xxx_check_prod_write_protect(struct se_portal_group *se_tpg)
        return QLA_TPG_ATTRIB(tpg)->prod_mode_write_protect;
 }
 
+static int tcm_qla2xxx_check_demo_mode_login_only(struct se_portal_group *se_tpg)
+{
+       struct tcm_qla2xxx_tpg *tpg = container_of(se_tpg,
+                               struct tcm_qla2xxx_tpg, se_tpg);
+
+       return QLA_TPG_ATTRIB(tpg)->demo_mode_login_only;
+}
+
 static struct se_node_acl *tcm_qla2xxx_alloc_fabric_acl(
        struct se_portal_group *se_tpg)
 {
@@ -489,38 +494,13 @@ static u32 tcm_qla2xxx_sess_get_index(struct se_session *se_sess)
        return 0;
 }
 
-/*
- * The LIO target core uses DMA_TO_DEVICE to mean that data is going
- * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean
- * that data is coming from the target (eg handling a READ).  However,
- * this is just the opposite of what we have to tell the DMA mapping
- * layer -- eg when handling a READ, the HBA will have to DMA the data
- * out of memory so it can send it to the initiator, which means we
- * need to use DMA_TO_DEVICE when we map the data.
- */
-static enum dma_data_direction tcm_qla2xxx_mapping_dir(struct se_cmd *se_cmd)
-{
-       if (se_cmd->se_cmd_flags & SCF_BIDI)
-               return DMA_BIDIRECTIONAL;
-
-       switch (se_cmd->data_direction) {
-       case DMA_TO_DEVICE:
-               return DMA_FROM_DEVICE;
-       case DMA_FROM_DEVICE:
-               return DMA_TO_DEVICE;
-       case DMA_NONE:
-       default:
-               return DMA_NONE;
-       }
-}
-
 static int tcm_qla2xxx_write_pending(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
 
        cmd->bufflen = se_cmd->data_length;
-       cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd);
+       cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
 
        cmd->sg_cnt = se_cmd->t_data_nents;
        cmd->sg = se_cmd->t_data_sg;
@@ -656,7 +636,7 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
                                struct qla_tgt_cmd, se_cmd);
 
        cmd->bufflen = se_cmd->data_length;
-       cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd);
+       cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
        cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED);
 
        cmd->sg_cnt = se_cmd->t_data_nents;
@@ -680,7 +660,7 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
        cmd->sg = NULL;
        cmd->sg_cnt = 0;
        cmd->offset = 0;
-       cmd->dma_data_direction = tcm_qla2xxx_mapping_dir(se_cmd);
+       cmd->dma_data_direction = target_reverse_dma_direction(se_cmd);
        cmd->aborted = (se_cmd->transport_state & CMD_T_ABORTED);
 
        if (se_cmd->data_direction == DMA_FROM_DEVICE) {
@@ -939,11 +919,19 @@ DEF_QLA_TPG_ATTR_BOOL(prod_mode_write_protect);
 DEF_QLA_TPG_ATTRIB(prod_mode_write_protect);
 QLA_TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR);
 
+/*
+ * Define tcm_qla2xxx_tpg_attrib_s_demo_mode_login_only
+ */
+DEF_QLA_TPG_ATTR_BOOL(demo_mode_login_only);
+DEF_QLA_TPG_ATTRIB(demo_mode_login_only);
+QLA_TPG_ATTR(demo_mode_login_only, S_IRUGO | S_IWUSR);
+
 static struct configfs_attribute *tcm_qla2xxx_tpg_attrib_attrs[] = {
        &tcm_qla2xxx_tpg_attrib_generate_node_acls.attr,
        &tcm_qla2xxx_tpg_attrib_cache_dynamic_acls.attr,
        &tcm_qla2xxx_tpg_attrib_demo_mode_write_protect.attr,
        &tcm_qla2xxx_tpg_attrib_prod_mode_write_protect.attr,
+       &tcm_qla2xxx_tpg_attrib_demo_mode_login_only.attr,
        NULL,
 };
 
@@ -1042,6 +1030,7 @@ static struct se_portal_group *tcm_qla2xxx_make_tpg(
        QLA_TPG_ATTRIB(tpg)->generate_node_acls = 1;
        QLA_TPG_ATTRIB(tpg)->demo_mode_write_protect = 1;
        QLA_TPG_ATTRIB(tpg)->cache_dynamic_acls = 1;
+       QLA_TPG_ATTRIB(tpg)->demo_mode_login_only = 1;
 
        ret = core_tpg_register(&tcm_qla2xxx_fabric_configfs->tf_ops, wwn,
                                &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
@@ -1736,7 +1725,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_ops = {
                                        tcm_qla2xxx_check_demo_write_protect,
        .tpg_check_prod_mode_write_protect =
                                        tcm_qla2xxx_check_prod_write_protect,
-       .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true,
+       .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only,
        .tpg_alloc_fabric_acl           = tcm_qla2xxx_alloc_fabric_acl,
        .tpg_release_fabric_acl         = tcm_qla2xxx_release_fabric_acl,
        .tpg_get_inst_index             = tcm_qla2xxx_tpg_get_inst_index,
@@ -1784,7 +1773,7 @@ static struct target_core_fabric_ops tcm_qla2xxx_npiv_ops = {
        .tpg_check_demo_mode_cache      = tcm_qla2xxx_check_true,
        .tpg_check_demo_mode_write_protect = tcm_qla2xxx_check_true,
        .tpg_check_prod_mode_write_protect = tcm_qla2xxx_check_false,
-       .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_true,
+       .tpg_check_demo_mode_login_only = tcm_qla2xxx_check_demo_mode_login_only,
        .tpg_alloc_fabric_acl           = tcm_qla2xxx_alloc_fabric_acl,
        .tpg_release_fabric_acl         = tcm_qla2xxx_release_fabric_acl,
        .tpg_get_inst_index             = tcm_qla2xxx_tpg_get_inst_index,
index 9ba075f..3293275 100644 (file)
@@ -29,6 +29,7 @@ struct tcm_qla2xxx_tpg_attrib {
        int cache_dynamic_acls;
        int demo_mode_write_protect;
        int prod_mode_write_protect;
+       int demo_mode_login_only;
 };
 
 struct tcm_qla2xxx_tpg {
index b58e8f8..e62d17d 100644 (file)
@@ -2420,14 +2420,9 @@ sd_read_cache_type(struct scsi_disk *sdkp, unsigned char *buffer)
                        }
                }
 
-               if (modepage == 0x3F) {
-                       sd_printk(KERN_ERR, sdkp, "No Caching mode page "
-                                 "present\n");
-                       goto defaults;
-               } else if ((buffer[offset] & 0x3f) != modepage) {
-                       sd_printk(KERN_ERR, sdkp, "Got wrong page\n");
-                       goto defaults;
-               }
+               sd_printk(KERN_ERR, sdkp, "No Caching mode page found\n");
+               goto defaults;
+
        Page_found:
                if (modepage == 8) {
                        sdkp->WCE = ((buffer[offset + 2] & 0x04) != 0);
index bce09a6..7210500 100644 (file)
@@ -177,6 +177,7 @@ enum {
        MASK_TASK_RESPONSE              = 0xFF00,
        MASK_RSP_UPIU_RESULT            = 0xFFFF,
        MASK_QUERY_DATA_SEG_LEN         = 0xFFFF,
+       MASK_RSP_UPIU_DATA_SEG_LEN      = 0xFFFF,
        MASK_RSP_EXCEPTION_EVENT        = 0x10000,
 };
 
index b36ca9a..04884d6 100644 (file)
 #include <linux/async.h>
 
 #include "ufshcd.h"
+#include "unipro.h"
 
 #define UFSHCD_ENABLE_INTRS    (UTP_TRANSFER_REQ_COMPL |\
                                 UTP_TASK_REQ_COMPL |\
+                                UIC_POWER_MODE |\
                                 UFSHCD_ERROR_MASK)
 /* UIC command timeout, unit: ms */
 #define UIC_CMD_TIMEOUT        500
@@ -56,6 +58,9 @@
 /* Expose the flag value from utp_upiu_query.value */
 #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
 
+/* Interrupt aggregation default timeout, unit: 40us */
+#define INT_AGGR_DEF_TO        0x02
+
 enum {
        UFSHCD_MAX_CHANNEL      = 0,
        UFSHCD_MAX_ID           = 1,
@@ -78,12 +83,6 @@ enum {
        UFSHCD_INT_CLEAR,
 };
 
-/* Interrupt aggregation options */
-enum {
-       INT_AGGR_RESET,
-       INT_AGGR_CONFIG,
-};
-
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -237,6 +236,18 @@ static inline int ufshcd_get_uic_cmd_result(struct ufs_hba *hba)
               MASK_UIC_COMMAND_RESULT;
 }
 
+/**
+ * ufshcd_get_dme_attr_val - Get the value of attribute returned by UIC command
+ * @hba: Pointer to adapter instance
+ *
+ * This function gets UIC command argument3
+ * Returns 0 on success, non zero value on error
+ */
+static inline u32 ufshcd_get_dme_attr_val(struct ufs_hba *hba)
+{
+       return ufshcd_readl(hba, REG_UIC_COMMAND_ARG_3);
+}
+
 /**
  * ufshcd_get_req_rsp - returns the TR response transaction type
  * @ucd_rsp_ptr: pointer to response UPIU
@@ -260,6 +271,20 @@ ufshcd_get_rsp_upiu_result(struct utp_upiu_rsp *ucd_rsp_ptr)
        return be32_to_cpu(ucd_rsp_ptr->header.dword_1) & MASK_RSP_UPIU_RESULT;
 }
 
+/*
+ * ufshcd_get_rsp_upiu_data_seg_len - Get the data segment length
+ *                             from response UPIU
+ * @ucd_rsp_ptr: pointer to response UPIU
+ *
+ * Return the data segment length.
+ */
+static inline unsigned int
+ufshcd_get_rsp_upiu_data_seg_len(struct utp_upiu_rsp *ucd_rsp_ptr)
+{
+       return be32_to_cpu(ucd_rsp_ptr->header.dword_2) &
+               MASK_RSP_UPIU_DATA_SEG_LEN;
+}
+
 /**
  * ufshcd_is_exception_event - Check if the device raised an exception event
  * @ucd_rsp_ptr: pointer to response UPIU
@@ -276,30 +301,30 @@ static inline bool ufshcd_is_exception_event(struct utp_upiu_rsp *ucd_rsp_ptr)
 }
 
 /**
- * ufshcd_config_int_aggr - Configure interrupt aggregation values.
- *             Currently there is no use case where we want to configure
- *             interrupt aggregation dynamically. So to configure interrupt
- *             aggregation, #define INT_AGGR_COUNTER_THRESHOLD_VALUE and
- *             INT_AGGR_TIMEOUT_VALUE are used.
+ * ufshcd_reset_intr_aggr - Reset interrupt aggregation values.
  * @hba: per adapter instance
- * @option: Interrupt aggregation option
  */
 static inline void
-ufshcd_config_int_aggr(struct ufs_hba *hba, int option)
+ufshcd_reset_intr_aggr(struct ufs_hba *hba)
 {
-       switch (option) {
-       case INT_AGGR_RESET:
-               ufshcd_writel(hba, INT_AGGR_ENABLE |
-                             INT_AGGR_COUNTER_AND_TIMER_RESET,
-                             REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL);
-               break;
-       case INT_AGGR_CONFIG:
-               ufshcd_writel(hba, INT_AGGR_ENABLE | INT_AGGR_PARAM_WRITE |
-                             INT_AGGR_COUNTER_THRESHOLD_VALUE |
-                             INT_AGGR_TIMEOUT_VALUE,
-                             REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL);
-               break;
-       }
+       ufshcd_writel(hba, INT_AGGR_ENABLE |
+                     INT_AGGR_COUNTER_AND_TIMER_RESET,
+                     REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL);
+}
+
+/**
+ * ufshcd_config_intr_aggr - Configure interrupt aggregation values.
+ * @hba: per adapter instance
+ * @cnt: Interrupt aggregation counter threshold
+ * @tmout: Interrupt aggregation timeout value
+ */
+static inline void
+ufshcd_config_intr_aggr(struct ufs_hba *hba, u8 cnt, u8 tmout)
+{
+       ufshcd_writel(hba, INT_AGGR_ENABLE | INT_AGGR_PARAM_WRITE |
+                     INT_AGGR_COUNTER_THLD_VAL(cnt) |
+                     INT_AGGR_TIMEOUT_VAL(tmout),
+                     REG_UTP_TRANSFER_REQ_INT_AGG_CONTROL);
 }
 
 /**
@@ -355,7 +380,8 @@ void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 static inline void ufshcd_copy_sense_data(struct ufshcd_lrb *lrbp)
 {
        int len;
-       if (lrbp->sense_buffer) {
+       if (lrbp->sense_buffer &&
+           ufshcd_get_rsp_upiu_data_seg_len(lrbp->ucd_rsp_ptr)) {
                len = be16_to_cpu(lrbp->ucd_rsp_ptr->sr.sense_data_len);
                memcpy(lrbp->sense_buffer,
                        lrbp->ucd_rsp_ptr->sr.sense_data,
@@ -445,6 +471,18 @@ static inline bool ufshcd_ready_for_uic_cmd(struct ufs_hba *hba)
                return false;
 }
 
+/**
+ * ufshcd_get_upmcrs - Get the power mode change request status
+ * @hba: Pointer to adapter instance
+ *
+ * This function gets the UPMCRS field of HCS register
+ * Returns value of UPMCRS field
+ */
+static inline u8 ufshcd_get_upmcrs(struct ufs_hba *hba)
+{
+       return (ufshcd_readl(hba, REG_CONTROLLER_STATUS) >> 8) & 0x7;
+}
+
 /**
  * ufshcd_dispatch_uic_cmd - Dispatch UIC commands to unipro layers
  * @hba: per adapter instance
@@ -1361,6 +1399,202 @@ static int ufshcd_dme_link_startup(struct ufs_hba *hba)
        return ret;
 }
 
+/**
+ * ufshcd_dme_set_attr - UIC command for DME_SET, DME_PEER_SET
+ * @hba: per adapter instance
+ * @attr_sel: uic command argument1
+ * @attr_set: attribute set type as uic command argument2
+ * @mib_val: setting value as uic command argument3
+ * @peer: indicate whether peer or local
+ *
+ * Returns 0 on success, non-zero value on failure
+ */
+int ufshcd_dme_set_attr(struct ufs_hba *hba, u32 attr_sel,
+                       u8 attr_set, u32 mib_val, u8 peer)
+{
+       struct uic_command uic_cmd = {0};
+       static const char *const action[] = {
+               "dme-set",
+               "dme-peer-set"
+       };
+       const char *set = action[!!peer];
+       int ret;
+
+       uic_cmd.command = peer ?
+               UIC_CMD_DME_PEER_SET : UIC_CMD_DME_SET;
+       uic_cmd.argument1 = attr_sel;
+       uic_cmd.argument2 = UIC_ARG_ATTR_TYPE(attr_set);
+       uic_cmd.argument3 = mib_val;
+
+       ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
+       if (ret)
+               dev_err(hba->dev, "%s: attr-id 0x%x val 0x%x error code %d\n",
+                       set, UIC_GET_ATTR_ID(attr_sel), mib_val, ret);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ufshcd_dme_set_attr);
+
+/**
+ * ufshcd_dme_get_attr - UIC command for DME_GET, DME_PEER_GET
+ * @hba: per adapter instance
+ * @attr_sel: uic command argument1
+ * @mib_val: the value of the attribute as returned by the UIC command
+ * @peer: indicate whether peer or local
+ *
+ * Returns 0 on success, non-zero value on failure
+ */
+int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel,
+                       u32 *mib_val, u8 peer)
+{
+       struct uic_command uic_cmd = {0};
+       static const char *const action[] = {
+               "dme-get",
+               "dme-peer-get"
+       };
+       const char *get = action[!!peer];
+       int ret;
+
+       uic_cmd.command = peer ?
+               UIC_CMD_DME_PEER_GET : UIC_CMD_DME_GET;
+       uic_cmd.argument1 = attr_sel;
+
+       ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
+       if (ret) {
+               dev_err(hba->dev, "%s: attr-id 0x%x error code %d\n",
+                       get, UIC_GET_ATTR_ID(attr_sel), ret);
+               goto out;
+       }
+
+       if (mib_val)
+               *mib_val = uic_cmd.argument3;
+out:
+       return ret;
+}
+EXPORT_SYMBOL_GPL(ufshcd_dme_get_attr);
+
+/**
+ * ufshcd_uic_change_pwr_mode - Perform the UIC power mode chage
+ *                             using DME_SET primitives.
+ * @hba: per adapter instance
+ * @mode: powr mode value
+ *
+ * Returns 0 on success, non-zero value on failure
+ */
+int ufshcd_uic_change_pwr_mode(struct ufs_hba *hba, u8 mode)
+{
+       struct uic_command uic_cmd = {0};
+       struct completion pwr_done;
+       unsigned long flags;
+       u8 status;
+       int ret;
+
+       uic_cmd.command = UIC_CMD_DME_SET;
+       uic_cmd.argument1 = UIC_ARG_MIB(PA_PWRMODE);
+       uic_cmd.argument3 = mode;
+       init_completion(&pwr_done);
+
+       mutex_lock(&hba->uic_cmd_mutex);
+
+       spin_lock_irqsave(hba->host->host_lock, flags);
+       hba->pwr_done = &pwr_done;
+       spin_unlock_irqrestore(hba->host->host_lock, flags);
+       ret = __ufshcd_send_uic_cmd(hba, &uic_cmd);
+       if (ret) {
+               dev_err(hba->dev,
+                       "pwr mode change with mode 0x%x uic error %d\n",
+                       mode, ret);
+               goto out;
+       }
+
+       if (!wait_for_completion_timeout(hba->pwr_done,
+                                        msecs_to_jiffies(UIC_CMD_TIMEOUT))) {
+               dev_err(hba->dev,
+                       "pwr mode change with mode 0x%x completion timeout\n",
+                       mode);
+               ret = -ETIMEDOUT;
+               goto out;
+       }
+
+       status = ufshcd_get_upmcrs(hba);
+       if (status != PWR_LOCAL) {
+               dev_err(hba->dev,
+                       "pwr mode change failed, host umpcrs:0x%x\n",
+                       status);
+               ret = (status != PWR_OK) ? status : -1;
+       }
+out:
+       spin_lock_irqsave(hba->host->host_lock, flags);
+       hba->pwr_done = NULL;
+       spin_unlock_irqrestore(hba->host->host_lock, flags);
+       mutex_unlock(&hba->uic_cmd_mutex);
+       return ret;
+}
+
+/**
+ * ufshcd_config_max_pwr_mode - Set & Change power mode with
+ *     maximum capability attribute information.
+ * @hba: per adapter instance
+ *
+ * Returns 0 on success, non-zero value on failure
+ */
+static int ufshcd_config_max_pwr_mode(struct ufs_hba *hba)
+{
+       enum {RX = 0, TX = 1};
+       u32 lanes[] = {1, 1};
+       u32 gear[] = {1, 1};
+       u8 pwr[] = {FASTAUTO_MODE, FASTAUTO_MODE};
+       int ret;
+
+       /* Get the connected lane count */
+       ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDRXDATALANES), &lanes[RX]);
+       ufshcd_dme_get(hba, UIC_ARG_MIB(PA_CONNECTEDTXDATALANES), &lanes[TX]);
+
+       /*
+        * First, get the maximum gears of HS speed.
+        * If a zero value, it means there is no HSGEAR capability.
+        * Then, get the maximum gears of PWM speed.
+        */
+       ufshcd_dme_get(hba, UIC_ARG_MIB(PA_MAXRXHSGEAR), &gear[RX]);
+       if (!gear[RX]) {
+               ufshcd_dme_get(hba, UIC_ARG_MIB(PA_MAXRXPWMGEAR), &gear[RX]);
+               pwr[RX] = SLOWAUTO_MODE;
+       }
+
+       ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_MAXRXHSGEAR), &gear[TX]);
+       if (!gear[TX]) {
+               ufshcd_dme_peer_get(hba, UIC_ARG_MIB(PA_MAXRXPWMGEAR),
+                                   &gear[TX]);
+               pwr[TX] = SLOWAUTO_MODE;
+       }
+
+       /*
+        * Configure attributes for power mode change with below.
+        * - PA_RXGEAR, PA_ACTIVERXDATALANES, PA_RXTERMINATION,
+        * - PA_TXGEAR, PA_ACTIVETXDATALANES, PA_TXTERMINATION,
+        * - PA_HSSERIES
+        */
+       ufshcd_dme_set(hba, UIC_ARG_MIB(PA_RXGEAR), gear[RX]);
+       ufshcd_dme_set(hba, UIC_ARG_MIB(PA_ACTIVERXDATALANES), lanes[RX]);
+       if (pwr[RX] == FASTAUTO_MODE)
+               ufshcd_dme_set(hba, UIC_ARG_MIB(PA_RXTERMINATION), TRUE);
+
+       ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TXGEAR), gear[TX]);
+       ufshcd_dme_set(hba, UIC_ARG_MIB(PA_ACTIVETXDATALANES), lanes[TX]);
+       if (pwr[TX] == FASTAUTO_MODE)
+               ufshcd_dme_set(hba, UIC_ARG_MIB(PA_TXTERMINATION), TRUE);
+
+       if (pwr[RX] == FASTAUTO_MODE || pwr[TX] == FASTAUTO_MODE)
+               ufshcd_dme_set(hba, UIC_ARG_MIB(PA_HSSERIES), PA_HS_MODE_B);
+
+       ret = ufshcd_uic_change_pwr_mode(hba, pwr[RX] << 4 | pwr[TX]);
+       if (ret)
+               dev_err(hba->dev,
+                       "pwr_mode: power mode change failed %d\n", ret);
+
+       return ret;
+}
+
 /**
  * ufshcd_complete_dev_init() - checks device readiness
  * hba: per-adapter instance
@@ -1442,7 +1676,7 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
        ufshcd_enable_intr(hba, UFSHCD_ENABLE_INTRS);
 
        /* Configure interrupt aggregation */
-       ufshcd_config_int_aggr(hba, INT_AGGR_CONFIG);
+       ufshcd_config_intr_aggr(hba, hba->nutrs - 1, INT_AGGR_DEF_TO);
 
        /* Configure UTRL and UTMRL base address registers */
        ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
@@ -1788,32 +2022,24 @@ ufshcd_scsi_cmd_status(struct ufshcd_lrb *lrbp, int scsi_status)
        int result = 0;
 
        switch (scsi_status) {
-       case SAM_STAT_GOOD:
-               result |= DID_OK << 16 |
-                         COMMAND_COMPLETE << 8 |
-                         SAM_STAT_GOOD;
-               break;
        case SAM_STAT_CHECK_CONDITION:
+               ufshcd_copy_sense_data(lrbp);
+       case SAM_STAT_GOOD:
                result |= DID_OK << 16 |
                          COMMAND_COMPLETE << 8 |
-                         SAM_STAT_CHECK_CONDITION;
-               ufshcd_copy_sense_data(lrbp);
-               break;
-       case SAM_STAT_BUSY:
-               result |= SAM_STAT_BUSY;
+                         scsi_status;
                break;
        case SAM_STAT_TASK_SET_FULL:
-
                /*
                 * If a LUN reports SAM_STAT_TASK_SET_FULL, then the LUN queue
                 * depth needs to be adjusted to the exact number of
                 * outstanding commands the LUN can handle at any given time.
                 */
                ufshcd_adjust_lun_qdepth(lrbp->cmd);
-               result |= SAM_STAT_TASK_SET_FULL;
-               break;
+       case SAM_STAT_BUSY:
        case SAM_STAT_TASK_ABORTED:
-               result |= SAM_STAT_TASK_ABORTED;
+               ufshcd_copy_sense_data(lrbp);
+               result |= scsi_status;
                break;
        default:
                result |= DID_ERROR << 16;
@@ -1898,14 +2124,20 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 /**
  * ufshcd_uic_cmd_compl - handle completion of uic command
  * @hba: per adapter instance
+ * @intr_status: interrupt status generated by the controller
  */
-static void ufshcd_uic_cmd_compl(struct ufs_hba *hba)
+static void ufshcd_uic_cmd_compl(struct ufs_hba *hba, u32 intr_status)
 {
-       if (hba->active_uic_cmd) {
+       if ((intr_status & UIC_COMMAND_COMPL) && hba->active_uic_cmd) {
                hba->active_uic_cmd->argument2 |=
                        ufshcd_get_uic_cmd_result(hba);
+               hba->active_uic_cmd->argument3 =
+                       ufshcd_get_dme_attr_val(hba);
                complete(&hba->active_uic_cmd->done);
        }
+
+       if ((intr_status & UIC_POWER_MODE) && hba->pwr_done)
+               complete(hba->pwr_done);
 }
 
 /**
@@ -1960,7 +2192,7 @@ static void ufshcd_transfer_req_compl(struct ufs_hba *hba)
 
        /* Reset interrupt aggregation counters */
        if (int_aggr_reset)
-               ufshcd_config_int_aggr(hba, INT_AGGR_RESET);
+               ufshcd_reset_intr_aggr(hba);
 }
 
 /**
@@ -2251,8 +2483,8 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
        if (hba->errors)
                ufshcd_err_handler(hba);
 
-       if (intr_status & UIC_COMMAND_COMPL)
-               ufshcd_uic_cmd_compl(hba);
+       if (intr_status & UFSHCD_UIC_MASK)
+               ufshcd_uic_cmd_compl(hba, intr_status);
 
        if (intr_status & UTP_TASK_REQ_COMPL)
                ufshcd_tmc_handler(hba);
@@ -2494,6 +2726,8 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
        if (ret)
                goto out;
 
+       ufshcd_config_max_pwr_mode(hba);
+
        ret = ufshcd_verify_dev_init(hba);
        if (ret)
                goto out;
index 59c9c48..577679a 100644 (file)
@@ -175,6 +175,7 @@ struct ufs_dev_cmd {
  * @active_uic_cmd: handle of active UIC command
  * @uic_cmd_mutex: mutex for uic command
  * @ufshcd_tm_wait_queue: wait queue for task management
+ * @pwr_done: completion for power mode change
  * @tm_condition: condition variable for task management
  * @ufshcd_state: UFSHCD states
  * @intr_mask: Interrupt Mask Bits
@@ -219,6 +220,8 @@ struct ufs_hba {
        wait_queue_head_t ufshcd_tm_wait_queue;
        unsigned long tm_condition;
 
+       struct completion *pwr_done;
+
        u32 ufshcd_state;
        u32 intr_mask;
        u16 ee_ctrl_mask;
@@ -263,4 +266,55 @@ static inline void check_upiu_size(void)
 extern int ufshcd_runtime_suspend(struct ufs_hba *hba);
 extern int ufshcd_runtime_resume(struct ufs_hba *hba);
 extern int ufshcd_runtime_idle(struct ufs_hba *hba);
+extern int ufshcd_dme_set_attr(struct ufs_hba *hba, u32 attr_sel,
+                              u8 attr_set, u32 mib_val, u8 peer);
+extern int ufshcd_dme_get_attr(struct ufs_hba *hba, u32 attr_sel,
+                              u32 *mib_val, u8 peer);
+
+/* UIC command interfaces for DME primitives */
+#define DME_LOCAL      0
+#define DME_PEER       1
+#define ATTR_SET_NOR   0       /* NORMAL */
+#define ATTR_SET_ST    1       /* STATIC */
+
+static inline int ufshcd_dme_set(struct ufs_hba *hba, u32 attr_sel,
+                                u32 mib_val)
+{
+       return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_NOR,
+                                  mib_val, DME_LOCAL);
+}
+
+static inline int ufshcd_dme_st_set(struct ufs_hba *hba, u32 attr_sel,
+                                   u32 mib_val)
+{
+       return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_ST,
+                                  mib_val, DME_LOCAL);
+}
+
+static inline int ufshcd_dme_peer_set(struct ufs_hba *hba, u32 attr_sel,
+                                     u32 mib_val)
+{
+       return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_NOR,
+                                  mib_val, DME_PEER);
+}
+
+static inline int ufshcd_dme_peer_st_set(struct ufs_hba *hba, u32 attr_sel,
+                                        u32 mib_val)
+{
+       return ufshcd_dme_set_attr(hba, attr_sel, ATTR_SET_ST,
+                                  mib_val, DME_PEER);
+}
+
+static inline int ufshcd_dme_get(struct ufs_hba *hba,
+                                u32 attr_sel, u32 *mib_val)
+{
+       return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_LOCAL);
+}
+
+static inline int ufshcd_dme_peer_get(struct ufs_hba *hba,
+                                     u32 attr_sel, u32 *mib_val)
+{
+       return ufshcd_dme_get_attr(hba, attr_sel, mib_val, DME_PEER);
+}
+
 #endif /* End of Header */
index f1e1b74..0475c66 100644 (file)
@@ -124,6 +124,9 @@ enum {
 #define CONTROLLER_FATAL_ERROR                 UFS_BIT(16)
 #define SYSTEM_BUS_FATAL_ERROR                 UFS_BIT(17)
 
+#define UFSHCD_UIC_MASK                (UIC_COMMAND_COMPL |\
+                                UIC_POWER_MODE)
+
 #define UFSHCD_ERROR_MASK      (UIC_ERROR |\
                                DEVICE_FATAL_ERROR |\
                                CONTROLLER_FATAL_ERROR |\
@@ -142,6 +145,15 @@ enum {
 #define DEVICE_ERROR_INDICATOR                 UFS_BIT(5)
 #define UIC_POWER_MODE_CHANGE_REQ_STATUS_MASK  UFS_MASK(0x7, 8)
 
+enum {
+       PWR_OK          = 0x0,
+       PWR_LOCAL       = 0x01,
+       PWR_REMOTE      = 0x02,
+       PWR_BUSY        = 0x03,
+       PWR_ERROR_CAP   = 0x04,
+       PWR_FATAL_ERROR = 0x05,
+};
+
 /* HCE - Host Controller Enable 34h */
 #define CONTROLLER_ENABLE      UFS_BIT(0)
 #define CONTROLLER_DISABLE     0x0
@@ -191,6 +203,12 @@ enum {
 #define CONFIG_RESULT_CODE_MASK                0xFF
 #define GENERIC_ERROR_CODE_MASK                0xFF
 
+#define UIC_ARG_MIB_SEL(attr, sel)     ((((attr) & 0xFFFF) << 16) |\
+                                        ((sel) & 0xFFFF))
+#define UIC_ARG_MIB(attr)              UIC_ARG_MIB_SEL(attr, 0)
+#define UIC_ARG_ATTR_TYPE(t)           (((t) & 0xFF) << 16)
+#define UIC_GET_ATTR_ID(v)             (((v) >> 16) & 0xFFFF)
+
 /* UIC Commands */
 enum {
        UIC_CMD_DME_GET                 = 0x01,
@@ -226,8 +244,8 @@ enum {
 
 #define MASK_UIC_COMMAND_RESULT                        0xFF
 
-#define INT_AGGR_COUNTER_THRESHOLD_VALUE       (0x1F << 8)
-#define INT_AGGR_TIMEOUT_VALUE                 (0x02)
+#define INT_AGGR_COUNTER_THLD_VAL(c)   (((c) & 0x1F) << 8)
+#define INT_AGGR_TIMEOUT_VAL(t)                (((t) & 0xFF) << 0)
 
 /* Interrupt disable masks */
 enum {
diff --git a/drivers/scsi/ufs/unipro.h b/drivers/scsi/ufs/unipro.h
new file mode 100644 (file)
index 0000000..0bb8041
--- /dev/null
@@ -0,0 +1,151 @@
+/*
+ * drivers/scsi/ufs/unipro.h
+ *
+ * Copyright (C) 2013 Samsung Electronics Co., Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef _UNIPRO_H_
+#define _UNIPRO_H_
+
+/*
+ * PHY Adpater attributes
+ */
+#define PA_ACTIVETXDATALANES   0x1560
+#define PA_ACTIVERXDATALANES   0x1580
+#define PA_TXTRAILINGCLOCKS    0x1564
+#define PA_PHY_TYPE            0x1500
+#define PA_AVAILTXDATALANES    0x1520
+#define PA_AVAILRXDATALANES    0x1540
+#define PA_MINRXTRAILINGCLOCKS 0x1543
+#define PA_TXPWRSTATUS         0x1567
+#define PA_RXPWRSTATUS         0x1582
+#define PA_TXFORCECLOCK                0x1562
+#define PA_TXPWRMODE           0x1563
+#define PA_LEGACYDPHYESCDL     0x1570
+#define PA_MAXTXSPEEDFAST      0x1521
+#define PA_MAXTXSPEEDSLOW      0x1522
+#define PA_MAXRXSPEEDFAST      0x1541
+#define PA_MAXRXSPEEDSLOW      0x1542
+#define PA_TXLINKSTARTUPHS     0x1544
+#define PA_TXSPEEDFAST         0x1565
+#define PA_TXSPEEDSLOW         0x1566
+#define PA_REMOTEVERINFO       0x15A0
+#define PA_TXGEAR              0x1568
+#define PA_TXTERMINATION       0x1569
+#define PA_HSSERIES            0x156A
+#define PA_PWRMODE             0x1571
+#define PA_RXGEAR              0x1583
+#define PA_RXTERMINATION       0x1584
+#define PA_MAXRXPWMGEAR                0x1586
+#define PA_MAXRXHSGEAR         0x1587
+#define PA_RXHSUNTERMCAP       0x15A5
+#define PA_RXLSTERMCAP         0x15A6
+#define PA_PACPREQTIMEOUT      0x1590
+#define PA_PACPREQEOBTIMEOUT   0x1591
+#define PA_HIBERN8TIME         0x15A7
+#define PA_LOCALVERINFO                0x15A9
+#define PA_TACTIVATE           0x15A8
+#define PA_PACPFRAMECOUNT      0x15C0
+#define PA_PACPERRORCOUNT      0x15C1
+#define PA_PHYTESTCONTROL      0x15C2
+#define PA_PWRMODEUSERDATA0    0x15B0
+#define PA_PWRMODEUSERDATA1    0x15B1
+#define PA_PWRMODEUSERDATA2    0x15B2
+#define PA_PWRMODEUSERDATA3    0x15B3
+#define PA_PWRMODEUSERDATA4    0x15B4
+#define PA_PWRMODEUSERDATA5    0x15B5
+#define PA_PWRMODEUSERDATA6    0x15B6
+#define PA_PWRMODEUSERDATA7    0x15B7
+#define PA_PWRMODEUSERDATA8    0x15B8
+#define PA_PWRMODEUSERDATA9    0x15B9
+#define PA_PWRMODEUSERDATA10   0x15BA
+#define PA_PWRMODEUSERDATA11   0x15BB
+#define PA_CONNECTEDTXDATALANES        0x1561
+#define PA_CONNECTEDRXDATALANES        0x1581
+#define PA_LOGICALLANEMAP      0x15A1
+#define PA_SLEEPNOCONFIGTIME   0x15A2
+#define PA_STALLNOCONFIGTIME   0x15A3
+#define PA_SAVECONFIGTIME      0x15A4
+
+/* PA power modes */
+enum {
+       FAST_MODE       = 1,
+       SLOW_MODE       = 2,
+       FASTAUTO_MODE   = 4,
+       SLOWAUTO_MODE   = 5,
+       UNCHANGED       = 7,
+};
+
+/* PA TX/RX Frequency Series */
+enum {
+       PA_HS_MODE_A    = 1,
+       PA_HS_MODE_B    = 2,
+};
+
+/*
+ * Data Link Layer Attributes
+ */
+#define DL_TC0TXFCTHRESHOLD    0x2040
+#define DL_FC0PROTTIMEOUTVAL   0x2041
+#define DL_TC0REPLAYTIMEOUTVAL 0x2042
+#define DL_AFC0REQTIMEOUTVAL   0x2043
+#define DL_AFC0CREDITTHRESHOLD 0x2044
+#define DL_TC0OUTACKTHRESHOLD  0x2045
+#define DL_TC1TXFCTHRESHOLD    0x2060
+#define DL_FC1PROTTIMEOUTVAL   0x2061
+#define DL_TC1REPLAYTIMEOUTVAL 0x2062
+#define DL_AFC1REQTIMEOUTVAL   0x2063
+#define DL_AFC1CREDITTHRESHOLD 0x2064
+#define DL_TC1OUTACKTHRESHOLD  0x2065
+#define DL_TXPREEMPTIONCAP     0x2000
+#define DL_TC0TXMAXSDUSIZE     0x2001
+#define DL_TC0RXINITCREDITVAL  0x2002
+#define DL_TC0TXBUFFERSIZE     0x2005
+#define DL_PEERTC0PRESENT      0x2046
+#define DL_PEERTC0RXINITCREVAL 0x2047
+#define DL_TC1TXMAXSDUSIZE     0x2003
+#define DL_TC1RXINITCREDITVAL  0x2004
+#define DL_TC1TXBUFFERSIZE     0x2006
+#define DL_PEERTC1PRESENT      0x2066
+#define DL_PEERTC1RXINITCREVAL 0x2067
+
+/*
+ * Network Layer Attributes
+ */
+#define N_DEVICEID             0x3000
+#define N_DEVICEID_VALID       0x3001
+#define N_TC0TXMAXSDUSIZE      0x3020
+#define N_TC1TXMAXSDUSIZE      0x3021
+
+/*
+ * Transport Layer Attributes
+ */
+#define T_NUMCPORTS            0x4000
+#define T_NUMTESTFEATURES      0x4001
+#define T_CONNECTIONSTATE      0x4020
+#define T_PEERDEVICEID         0x4021
+#define T_PEERCPORTID          0x4022
+#define T_TRAFFICCLASS         0x4023
+#define T_PROTOCOLID           0x4024
+#define T_CPORTFLAGS           0x4025
+#define T_TXTOKENVALUE         0x4026
+#define T_RXTOKENVALUE         0x4027
+#define T_LOCALBUFFERSPACE     0x4028
+#define T_PEERBUFFERSPACE      0x4029
+#define T_CREDITSTOSEND                0x402A
+#define T_CPORTMODE            0x402B
+#define T_TC0TXMAXSDUSIZE      0x4060
+#define T_TC1TXMAXSDUSIZE      0x4061
+
+/* Boolean attribute values */
+enum {
+       FALSE = 0,
+       TRUE,
+};
+
+#endif /* _UNIPRO_H_ */
index 0170d4c..b9c53cc 100644 (file)
@@ -55,7 +55,6 @@ comment "SPI Master Controller Drivers"
 
 config SPI_ALTERA
        tristate "Altera SPI Controller"
-       depends on GENERIC_HARDIRQS
        select SPI_BITBANG
        help
          This is the driver for the Altera SPI Controller.
@@ -358,7 +357,7 @@ config SPI_PXA2XX_DMA
 
 config SPI_PXA2XX
        tristate "PXA2xx SSP SPI master"
-       depends on (ARCH_PXA || PCI || ACPI) && GENERIC_HARDIRQS
+       depends on (ARCH_PXA || PCI || ACPI)
        select PXA_SSP if ARCH_PXA
        help
          This enables using a PXA2xx or Sodaville SSP port as a SPI master
index 21a3f72..8e76ddc 100644 (file)
@@ -341,27 +341,26 @@ out:
 /*
  * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab
  *
- * 'nr_to_scan' is the number of objects (pages) to prune, or 0 to query how
- * many objects (pages) we have in total.
+ * 'nr_to_scan' is the number of objects to scan for freeing.
  *
  * 'gfp_mask' is the mask of the allocation that got us into this mess.
  *
- * Return value is the number of objects (pages) remaining, or -1 if we cannot
+ * Return value is the number of objects freed or -1 if we cannot
  * proceed without risk of deadlock (due to gfp_mask).
  *
  * We approximate LRU via least-recently-unpinned, jettisoning unpinned partial
  * chunks of ashmem regions LRU-wise one-at-a-time until we hit 'nr_to_scan'
  * pages freed.
  */
-static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
+static unsigned long
+ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct ashmem_range *range, *next;
+       unsigned long freed = 0;
 
        /* We might recurse into filesystem code, so bail out if necessary */
-       if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
-               return -1;
-       if (!sc->nr_to_scan)
-               return lru_count;
+       if (!(sc->gfp_mask & __GFP_FS))
+               return SHRINK_STOP;
 
        mutex_lock(&ashmem_mutex);
        list_for_each_entry_safe(range, next, &ashmem_lru_list, lru) {
@@ -374,17 +373,32 @@ static int ashmem_shrink(struct shrinker *s, struct shrink_control *sc)
                range->purged = ASHMEM_WAS_PURGED;
                lru_del(range);
 
-               sc->nr_to_scan -= range_size(range);
-               if (sc->nr_to_scan <= 0)
+               freed += range_size(range);
+               if (--sc->nr_to_scan <= 0)
                        break;
        }
        mutex_unlock(&ashmem_mutex);
+       return freed;
+}
 
+static unsigned long
+ashmem_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       /*
+        * note that lru_count is count of pages on the lru, not a count of
+        * objects on the list. This means the scan function needs to return the
+        * number of pages freed, not the number of objects scanned.
+        */
        return lru_count;
 }
 
 static struct shrinker ashmem_shrinker = {
-       .shrink = ashmem_shrink,
+       .count_objects = ashmem_shrink_count,
+       .scan_objects = ashmem_shrink_scan,
+       /*
+        * XXX (dchinner): I wish people would comment on why they need on
+        * significant changes to the default value here
+        */
        .seeks = DEFAULT_SEEKS * 4,
 };
 
@@ -690,11 +704,11 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (capable(CAP_SYS_ADMIN)) {
                        struct shrink_control sc = {
                                .gfp_mask = GFP_KERNEL,
-                               .nr_to_scan = 0,
+                               .nr_to_scan = LONG_MAX,
                        };
-                       ret = ashmem_shrink(&ashmem_shrinker, &sc);
-                       sc.nr_to_scan = ret;
-                       ashmem_shrink(&ashmem_shrinker, &sc);
+
+                       nodes_setall(sc.nodes_to_scan);
+                       ashmem_shrink_scan(&ashmem_shrinker, &sc);
                }
                break;
        }
index a8c3444..d42f578 100644 (file)
@@ -481,7 +481,7 @@ static ssize_t logger_aio_write(struct kiocb *iocb, const struct iovec *iov,
        header.sec = now.tv_sec;
        header.nsec = now.tv_nsec;
        header.euid = current_euid();
-       header.len = min_t(size_t, iocb->ki_left, LOGGER_ENTRY_MAX_PAYLOAD);
+       header.len = min_t(size_t, iocb->ki_nbytes, LOGGER_ENTRY_MAX_PAYLOAD);
        header.hdr_size = sizeof(struct logger_entry);
 
        /* null writes succeed, return zero */
index fe74494..6f094b3 100644 (file)
@@ -66,11 +66,20 @@ static unsigned long lowmem_deathpending_timeout;
                        pr_info(x);                     \
        } while (0)
 
-static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
+static unsigned long lowmem_count(struct shrinker *s,
+                                 struct shrink_control *sc)
+{
+       return global_page_state(NR_ACTIVE_ANON) +
+               global_page_state(NR_ACTIVE_FILE) +
+               global_page_state(NR_INACTIVE_ANON) +
+               global_page_state(NR_INACTIVE_FILE);
+}
+
+static unsigned long lowmem_scan(struct shrinker *s, struct shrink_control *sc)
 {
        struct task_struct *tsk;
        struct task_struct *selected = NULL;
-       int rem = 0;
+       unsigned long rem = 0;
        int tasksize;
        int i;
        short min_score_adj = OOM_SCORE_ADJ_MAX + 1;
@@ -92,19 +101,17 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
                        break;
                }
        }
-       if (sc->nr_to_scan > 0)
-               lowmem_print(3, "lowmem_shrink %lu, %x, ofree %d %d, ma %hd\n",
-                               sc->nr_to_scan, sc->gfp_mask, other_free,
-                               other_file, min_score_adj);
-       rem = global_page_state(NR_ACTIVE_ANON) +
-               global_page_state(NR_ACTIVE_FILE) +
-               global_page_state(NR_INACTIVE_ANON) +
-               global_page_state(NR_INACTIVE_FILE);
-       if (sc->nr_to_scan <= 0 || min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
-               lowmem_print(5, "lowmem_shrink %lu, %x, return %d\n",
-                            sc->nr_to_scan, sc->gfp_mask, rem);
-               return rem;
+
+       lowmem_print(3, "lowmem_scan %lu, %x, ofree %d %d, ma %hd\n",
+                       sc->nr_to_scan, sc->gfp_mask, other_free,
+                       other_file, min_score_adj);
+
+       if (min_score_adj == OOM_SCORE_ADJ_MAX + 1) {
+               lowmem_print(5, "lowmem_scan %lu, %x, return 0\n",
+                            sc->nr_to_scan, sc->gfp_mask);
+               return 0;
        }
+
        selected_oom_score_adj = min_score_adj;
 
        rcu_read_lock();
@@ -154,16 +161,18 @@ static int lowmem_shrink(struct shrinker *s, struct shrink_control *sc)
                lowmem_deathpending_timeout = jiffies + HZ;
                send_sig(SIGKILL, selected, 0);
                set_tsk_thread_flag(selected, TIF_MEMDIE);
-               rem -= selected_tasksize;
+               rem += selected_tasksize;
        }
-       lowmem_print(4, "lowmem_shrink %lu, %x, return %d\n",
+
+       lowmem_print(4, "lowmem_scan %lu, %x, return %lu\n",
                     sc->nr_to_scan, sc->gfp_mask, rem);
        rcu_read_unlock();
        return rem;
 }
 
 static struct shrinker lowmem_shrinker = {
-       .shrink = lowmem_shrink,
+       .scan_objects = lowmem_scan,
+       .count_objects = lowmem_count,
        .seeks = DEFAULT_SEEKS * 16
 };
 
index 63efb7b..2af15d4 100644 (file)
        do { __oldfs = get_fs(); set_fs(get_ds());} while(0)
 #define MMSPACE_CLOSE         set_fs(__oldfs)
 
-/*
- * Shrinker
- */
-
-# define SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)  \
-                      struct shrinker *shrinker, \
-                      struct shrink_control *sc
-# define shrink_param(sc, var) ((sc)->var)
-
-typedef int (*shrinker_t)(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask));
-
-static inline
-struct shrinker *set_shrinker(int seek, shrinker_t func)
-{
-       struct shrinker *s;
-
-       s = kmalloc(sizeof(*s), GFP_KERNEL);
-       if (s == NULL)
-               return (NULL);
-
-       s->shrink = func;
-       s->seeks = seek;
-
-       register_shrinker(s);
-
-       return s;
-}
-
-static inline
-void remove_shrinker(struct shrinker *shrinker)
-{
-       if (shrinker == NULL)
-               return;
-
-       unregister_shrinker(shrinker);
-       kfree(shrinker);
-}
-
 #endif /* __LINUX_CFS_MEM_H__ */
index 454027d..0025ee6 100644 (file)
@@ -521,7 +521,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
                                int nr, unsigned int gfp_mask)
 {
        struct ldlm_namespace *ns;
-       int canceled = 0, unused;
+       int unused;
 
        ns = ldlm_pl2ns(pl);
 
@@ -540,14 +540,10 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
        unused = ns->ns_nr_unused;
        spin_unlock(&ns->ns_lock);
 
-       if (nr) {
-               canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
-                                          LDLM_CANCEL_SHRINK);
-       }
-       /*
-        * Return the number of potentially reclaimable locks.
-        */
-       return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+       if (nr == 0)
+               return (unused / 100) * sysctl_vfs_cache_pressure;
+       else
+               return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
 }
 
 struct ldlm_pool_ops ldlm_srv_pool_ops = {
@@ -601,9 +597,10 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
        return recalc_interval_sec;
 }
 
-/**
+/*
  * Pool shrink wrapper. Will call either client or server pool recalc callback
- * depending what pool \a pl is used.
+ * depending what pool pl is used. When nr == 0, just return the number of
+ * freeable locks. Otherwise, return the number of canceled locks.
  */
 int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
                     unsigned int gfp_mask)
@@ -1017,29 +1014,24 @@ static int ldlm_pool_granted(struct ldlm_pool *pl)
 }
 
 static struct ptlrpc_thread *ldlm_pools_thread;
-static struct shrinker *ldlm_pools_srv_shrinker;
-static struct shrinker *ldlm_pools_cli_shrinker;
 static struct completion ldlm_pools_comp;
 
 /*
- * Cancel \a nr locks from all namespaces (if possible). Returns number of
- * cached locks after shrink is finished. All namespaces are asked to
- * cancel approximately equal amount of locks to keep balancing.
+ * count locks from all namespaces (if possible). Returns number of
+ * cached locks.
  */
-static int ldlm_pools_shrink(ldlm_side_t client, int nr,
-                            unsigned int gfp_mask)
+static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask)
 {
-       int total = 0, cached = 0, nr_ns;
+       int total = 0, nr_ns;
        struct ldlm_namespace *ns;
        struct ldlm_namespace *ns_old = NULL; /* loop detection */
        void *cookie;
 
-       if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
-           !(gfp_mask & __GFP_FS))
-               return -1;
+       if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+               return 0;
 
-       CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
-              nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+       CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
+              client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
 
        cookie = cl_env_reenter();
 
@@ -1047,8 +1039,7 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         * Find out how many resources we may release.
         */
        for (nr_ns = ldlm_namespace_nr_read(client);
-            nr_ns > 0; nr_ns--)
-       {
+            nr_ns > 0; nr_ns--) {
                mutex_lock(ldlm_namespace_lock(client));
                if (list_empty(ldlm_namespace_list(client))) {
                        mutex_unlock(ldlm_namespace_lock(client));
@@ -1078,17 +1069,27 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                ldlm_namespace_put(ns);
        }
 
-       if (nr == 0 || total == 0) {
-               cl_env_reexit(cookie);
-               return total;
-       }
+       cl_env_reexit(cookie);
+       return total;
+}
+
+static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr, unsigned int gfp_mask)
+{
+       unsigned long freed = 0;
+       int tmp, nr_ns;
+       struct ldlm_namespace *ns;
+       void *cookie;
+
+       if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+               return -1;
+
+       cookie = cl_env_reenter();
 
        /*
-        * Shrink at least ldlm_namespace_nr(client) namespaces.
+        * Shrink at least ldlm_namespace_nr_read(client) namespaces.
         */
-       for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns;
-            nr_ns > 0; nr_ns--)
-       {
+       for (tmp = nr_ns = ldlm_namespace_nr_read(client);
+            tmp > 0; tmp--) {
                int cancel, nr_locks;
 
                /*
@@ -1097,12 +1098,6 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                mutex_lock(ldlm_namespace_lock(client));
                if (list_empty(ldlm_namespace_list(client))) {
                        mutex_unlock(ldlm_namespace_lock(client));
-                       /*
-                        * If list is empty, we can't return any @cached > 0,
-                        * that probably would cause needless shrinker
-                        * call.
-                        */
-                       cached = 0;
                        break;
                }
                ns = ldlm_namespace_first_locked(client);
@@ -1111,29 +1106,42 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                mutex_unlock(ldlm_namespace_lock(client));
 
                nr_locks = ldlm_pool_granted(&ns->ns_pool);
-               cancel = 1 + nr_locks * nr / total;
-               ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
-               cached += ldlm_pool_granted(&ns->ns_pool);
+               /*
+                * We use to shrink propotionally but with new shrinker API,
+                * we lost the total number of freeable locks.
+                */
+               cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
+               freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
                ldlm_namespace_put(ns);
        }
        cl_env_reexit(cookie);
-       /* we only decrease the SLV in server pools shrinker, return -1 to
-        * kernel to avoid needless loop. LU-1128 */
-       return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+       /*
+        * we only decrease the SLV in server pools shrinker, return
+        * SHRINK_STOP to kernel to avoid needless loop. LU-1128
+        */
+       return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed;
+}
+
+static unsigned long ldlm_pools_srv_count(struct shrinker *s, struct shrink_control *sc)
+{
+       return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask);
 }
 
-static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_srv_scan(struct shrinker *s, struct shrink_control *sc)
 {
-       return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER,
-                                shrink_param(sc, nr_to_scan),
-                                shrink_param(sc, gfp_mask));
+       return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan,
+                              sc->gfp_mask);
 }
 
-static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc)
 {
-       return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT,
-                                shrink_param(sc, nr_to_scan),
-                                shrink_param(sc, gfp_mask));
+       return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
+}
+
+static unsigned long ldlm_pools_cli_scan(struct shrinker *s, struct shrink_control *sc)
+{
+       return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
+                              sc->gfp_mask);
 }
 
 int ldlm_pools_recalc(ldlm_side_t client)
@@ -1216,7 +1224,7 @@ int ldlm_pools_recalc(ldlm_side_t client)
        }
 
        /*
-        * Recalc at least ldlm_namespace_nr(client) namespaces.
+        * Recalc at least ldlm_namespace_nr_read(client) namespaces.
         */
        for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) {
                int     skip;
@@ -1383,18 +1391,26 @@ static void ldlm_pools_thread_stop(void)
        ldlm_pools_thread = NULL;
 }
 
+static struct shrinker ldlm_pools_srv_shrinker = {
+       .count_objects  = ldlm_pools_srv_count,
+       .scan_objects   = ldlm_pools_srv_scan,
+       .seeks          = DEFAULT_SEEKS,
+};
+
+static struct shrinker ldlm_pools_cli_shrinker = {
+       .count_objects  = ldlm_pools_cli_count,
+       .scan_objects   = ldlm_pools_cli_scan,
+       .seeks          = DEFAULT_SEEKS,
+};
+
 int ldlm_pools_init(void)
 {
        int rc;
 
        rc = ldlm_pools_thread_start();
        if (rc == 0) {
-               ldlm_pools_srv_shrinker =
-                       set_shrinker(DEFAULT_SEEKS,
-                                        ldlm_pools_srv_shrink);
-               ldlm_pools_cli_shrinker =
-                       set_shrinker(DEFAULT_SEEKS,
-                                        ldlm_pools_cli_shrink);
+               register_shrinker(&ldlm_pools_srv_shrinker);
+               register_shrinker(&ldlm_pools_cli_shrinker);
        }
        return rc;
 }
@@ -1402,14 +1418,8 @@ EXPORT_SYMBOL(ldlm_pools_init);
 
 void ldlm_pools_fini(void)
 {
-       if (ldlm_pools_srv_shrinker != NULL) {
-               remove_shrinker(ldlm_pools_srv_shrinker);
-               ldlm_pools_srv_shrinker = NULL;
-       }
-       if (ldlm_pools_cli_shrinker != NULL) {
-               remove_shrinker(ldlm_pools_cli_shrinker);
-               ldlm_pools_cli_shrinker = NULL;
-       }
+       unregister_shrinker(&ldlm_pools_srv_shrinker);
+       unregister_shrinker(&ldlm_pools_cli_shrinker);
        ldlm_pools_thread_stop();
 }
 EXPORT_SYMBOL(ldlm_pools_fini);
index 253f026..bc534db 100644 (file)
@@ -1009,7 +1009,7 @@ static ssize_t ll_file_read(struct file *file, char *buf, size_t count,
        local_iov->iov_len = count;
        init_sync_kiocb(kiocb, file);
        kiocb->ki_pos = *ppos;
-       kiocb->ki_left = count;
+       kiocb->ki_nbytes = count;
 
        result = ll_file_aio_read(kiocb, local_iov, 1, kiocb->ki_pos);
        *ppos = kiocb->ki_pos;
@@ -1068,7 +1068,7 @@ static ssize_t ll_file_write(struct file *file, const char *buf, size_t count,
        local_iov->iov_len = count;
        init_sync_kiocb(kiocb, file);
        kiocb->ki_pos = *ppos;
-       kiocb->ki_left = count;
+       kiocb->ki_nbytes = count;
 
        result = ll_file_aio_write(kiocb, local_iov, 1, kiocb->ki_pos);
        *ppos = kiocb->ki_pos;
index c29ac1c..3a3d5bc 100644 (file)
@@ -1779,7 +1779,6 @@ int lu_env_refill_by_tags(struct lu_env *env, __u32 ctags,
 }
 EXPORT_SYMBOL(lu_env_refill_by_tags);
 
-static struct shrinker *lu_site_shrinker = NULL;
 
 typedef struct lu_site_stats{
        unsigned        lss_populated;
@@ -1835,61 +1834,68 @@ static void lu_site_stats_get(cfs_hash_t *hs,
  * objects without taking the  lu_sites_guard lock, but this is not
  * possible in the current implementation.
  */
-static int lu_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long lu_cache_shrink_count(struct shrinker *sk,
+                                          struct shrink_control *sc)
 {
        lu_site_stats_t stats;
        struct lu_site *s;
        struct lu_site *tmp;
-       int cached = 0;
-       int remain = shrink_param(sc, nr_to_scan);
-       LIST_HEAD(splice);
-
-       if (!(shrink_param(sc, gfp_mask) & __GFP_FS)) {
-               if (remain != 0)
-                       return -1;
-               else
-                       /* We must not take the lu_sites_guard lock when
-                        * __GFP_FS is *not* set because of the deadlock
-                        * possibility detailed above. Additionally,
-                        * since we cannot determine the number of
-                        * objects in the cache without taking this
-                        * lock, we're in a particularly tough spot. As
-                        * a result, we'll just lie and say our cache is
-                        * empty. This _should_ be ok, as we can't
-                        * reclaim objects when __GFP_FS is *not* set
-                        * anyways.
-                        */
-                       return 0;
-       }
+       unsigned long cached = 0;
 
-       CDEBUG(D_INODE, "Shrink %d objects\n", remain);
+       if (!(sc->gfp_mask & __GFP_FS))
+               return 0;
 
        mutex_lock(&lu_sites_guard);
        list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
-               if (shrink_param(sc, nr_to_scan) != 0) {
-                       remain = lu_site_purge(&lu_shrink_env, s, remain);
-                       /*
-                        * Move just shrunk site to the tail of site list to
-                        * assure shrinking fairness.
-                        */
-                       list_move_tail(&s->ls_linkage, &splice);
-               }
-
                memset(&stats, 0, sizeof(stats));
                lu_site_stats_get(s->ls_obj_hash, &stats, 0);
                cached += stats.lss_total - stats.lss_busy;
-               if (shrink_param(sc, nr_to_scan) && remain <= 0)
-                       break;
        }
-       list_splice(&splice, lu_sites.prev);
        mutex_unlock(&lu_sites_guard);
 
        cached = (cached / 100) * sysctl_vfs_cache_pressure;
-       if (shrink_param(sc, nr_to_scan) == 0)
-               CDEBUG(D_INODE, "%d objects cached\n", cached);
+       CDEBUG(D_INODE, "%ld objects cached\n", cached);
        return cached;
 }
 
+static unsigned long lu_cache_shrink_scan(struct shrinker *sk,
+                                         struct shrink_control *sc)
+{
+       struct lu_site *s;
+       struct lu_site *tmp;
+       unsigned long remain = sc->nr_to_scan, freed = 0;
+       LIST_HEAD(splice);
+
+       if (!(sc->gfp_mask & __GFP_FS))
+               /* We must not take the lu_sites_guard lock when
+                * __GFP_FS is *not* set because of the deadlock
+                * possibility detailed above. Additionally,
+                * since we cannot determine the number of
+                * objects in the cache without taking this
+                * lock, we're in a particularly tough spot. As
+                * a result, we'll just lie and say our cache is
+                * empty. This _should_ be ok, as we can't
+                * reclaim objects when __GFP_FS is *not* set
+                * anyways.
+                */
+               return SHRINK_STOP;
+
+       mutex_lock(&lu_sites_guard);
+       list_for_each_entry_safe(s, tmp, &lu_sites, ls_linkage) {
+               freed = lu_site_purge(&lu_shrink_env, s, remain);
+               remain -= freed;
+               /*
+                * Move just shrunk site to the tail of site list to
+                * assure shrinking fairness.
+                */
+               list_move_tail(&s->ls_linkage, &splice);
+       }
+       list_splice(&splice, lu_sites.prev);
+       mutex_unlock(&lu_sites_guard);
+
+       return sc->nr_to_scan - remain;
+}
+
 /*
  * Debugging stuff.
  */
@@ -1913,6 +1919,12 @@ int lu_printk_printer(const struct lu_env *env,
        return 0;
 }
 
+static struct shrinker lu_site_shrinker = {
+       .count_objects  = lu_cache_shrink_count,
+       .scan_objects   = lu_cache_shrink_scan,
+       .seeks          = DEFAULT_SEEKS,
+};
+
 /**
  * Initialization of global lu_* data.
  */
@@ -1947,9 +1959,7 @@ int lu_global_init(void)
         * inode, one for ea. Unfortunately setting this high value results in
         * lu_object/inode cache consuming all the memory.
         */
-       lu_site_shrinker = set_shrinker(DEFAULT_SEEKS, lu_cache_shrink);
-       if (lu_site_shrinker == NULL)
-               return -ENOMEM;
+       register_shrinker(&lu_site_shrinker);
 
        return result;
 }
@@ -1959,11 +1969,7 @@ int lu_global_init(void)
  */
 void lu_global_fini(void)
 {
-       if (lu_site_shrinker != NULL) {
-               remove_shrinker(lu_site_shrinker);
-               lu_site_shrinker = NULL;
-       }
-
+       unregister_shrinker(&lu_site_shrinker);
        lu_context_key_degister(&lu_global_key);
 
        /*
index 9013745..e90c8fb 100644 (file)
@@ -120,13 +120,6 @@ static struct ptlrpc_enc_page_pool {
        struct page    ***epp_pools;
 } page_pools;
 
-/*
- * memory shrinker
- */
-const int pools_shrinker_seeks = DEFAULT_SEEKS;
-static struct shrinker *pools_shrinker = NULL;
-
-
 /*
  * /proc/fs/lustre/sptlrpc/encrypt_page_pools
  */
@@ -226,30 +219,46 @@ static void enc_pools_release_free_pages(long npages)
 }
 
 /*
- * could be called frequently for query (@nr_to_scan == 0).
  * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
  */
-static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+static unsigned long enc_pools_shrink_count(struct shrinker *s,
+                                           struct shrink_control *sc)
 {
-       if (unlikely(shrink_param(sc, nr_to_scan) != 0)) {
+       /*
+        * if no pool access for a long time, we consider it's fully idle.
+        * a little race here is fine.
+        */
+       if (unlikely(cfs_time_current_sec() - page_pools.epp_last_access >
+                    CACHE_QUIESCENT_PERIOD)) {
                spin_lock(&page_pools.epp_lock);
-               shrink_param(sc, nr_to_scan) = min_t(unsigned long,
-                                                  shrink_param(sc, nr_to_scan),
-                                                  page_pools.epp_free_pages -
-                                                  PTLRPC_MAX_BRW_PAGES);
-               if (shrink_param(sc, nr_to_scan) > 0) {
-                       enc_pools_release_free_pages(shrink_param(sc,
-                                                                 nr_to_scan));
-                       CDEBUG(D_SEC, "released %ld pages, %ld left\n",
-                              (long)shrink_param(sc, nr_to_scan),
-                              page_pools.epp_free_pages);
-
-                       page_pools.epp_st_shrinks++;
-                       page_pools.epp_last_shrink = cfs_time_current_sec();
-               }
+               page_pools.epp_idle_idx = IDLE_IDX_MAX;
                spin_unlock(&page_pools.epp_lock);
        }
 
+       LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
+       return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
+               (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+}
+
+/*
+ * we try to keep at least PTLRPC_MAX_BRW_PAGES pages in the pool.
+ */
+static unsigned long enc_pools_shrink_scan(struct shrinker *s,
+                                          struct shrink_control *sc)
+{
+       spin_lock(&page_pools.epp_lock);
+       sc->nr_to_scan = min_t(unsigned long, sc->nr_to_scan,
+                             page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES);
+       if (sc->nr_to_scan > 0) {
+               enc_pools_release_free_pages(sc->nr_to_scan);
+               CDEBUG(D_SEC, "released %ld pages, %ld left\n",
+                      (long)sc->nr_to_scan, page_pools.epp_free_pages);
+
+               page_pools.epp_st_shrinks++;
+               page_pools.epp_last_shrink = cfs_time_current_sec();
+       }
+       spin_unlock(&page_pools.epp_lock);
+
        /*
         * if no pool access for a long time, we consider it's fully idle.
         * a little race here is fine.
@@ -262,8 +271,7 @@ static int enc_pools_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
        }
 
        LASSERT(page_pools.epp_idle_idx <= IDLE_IDX_MAX);
-       return max((int)page_pools.epp_free_pages - PTLRPC_MAX_BRW_PAGES, 0) *
-               (IDLE_IDX_MAX - page_pools.epp_idle_idx) / IDLE_IDX_MAX;
+       return sc->nr_to_scan;
 }
 
 static inline
@@ -699,6 +707,12 @@ static inline void enc_pools_free(void)
                       sizeof(*page_pools.epp_pools));
 }
 
+static struct shrinker pools_shrinker = {
+       .count_objects  = enc_pools_shrink_count,
+       .scan_objects   = enc_pools_shrink_scan,
+       .seeks          = DEFAULT_SEEKS,
+};
+
 int sptlrpc_enc_pool_init(void)
 {
        /*
@@ -736,12 +750,7 @@ int sptlrpc_enc_pool_init(void)
        if (page_pools.epp_pools == NULL)
                return -ENOMEM;
 
-       pools_shrinker = set_shrinker(pools_shrinker_seeks,
-                                         enc_pools_shrink);
-       if (pools_shrinker == NULL) {
-               enc_pools_free();
-               return -ENOMEM;
-       }
+       register_shrinker(&pools_shrinker);
 
        return 0;
 }
@@ -750,11 +759,10 @@ void sptlrpc_enc_pool_fini(void)
 {
        unsigned long cleaned, npools;
 
-       LASSERT(pools_shrinker);
        LASSERT(page_pools.epp_pools);
        LASSERT(page_pools.epp_total_pages == page_pools.epp_free_pages);
 
-       remove_shrinker(pools_shrinker);
+       unregister_shrinker(&pools_shrinker);
 
        npools = npages_to_npools(page_pools.epp_total_pages);
        cleaned = enc_pools_cleanup(page_pools.epp_pools, npools);
index 9fdcb56..85b012d 100644 (file)
@@ -13,7 +13,8 @@ target_core_mod-y             := target_core_configfs.o \
                                   target_core_spc.o \
                                   target_core_ua.o \
                                   target_core_rd.o \
-                                  target_core_stat.o
+                                  target_core_stat.o \
+                                  target_core_xcopy.o
 
 obj-$(CONFIG_TARGET_CORE)      += target_core_mod.o
 
index 3a17930..35b61f7 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains main functions related to the iSCSI Target Core Driver.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -63,7 +61,6 @@ spinlock_t sess_idr_lock;
 
 struct iscsit_global *iscsit_global;
 
-struct kmem_cache *lio_cmd_cache;
 struct kmem_cache *lio_qr_cache;
 struct kmem_cache *lio_dr_cache;
 struct kmem_cache *lio_ooo_cache;
@@ -220,11 +217,6 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
                spin_unlock_bh(&np->np_thread_lock);
                return -1;
        }
-       if (np->np_login_tpg) {
-               pr_err("np->np_login_tpg() is not NULL!\n");
-               spin_unlock_bh(&np->np_thread_lock);
-               return -1;
-       }
        spin_unlock_bh(&np->np_thread_lock);
        /*
         * Determine if the portal group is accepting storage traffic.
@@ -239,26 +231,38 @@ int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
        /*
         * Here we serialize access across the TIQN+TPG Tuple.
         */
-       ret = mutex_lock_interruptible(&tpg->np_login_lock);
+       ret = down_interruptible(&tpg->np_login_sem);
        if ((ret != 0) || signal_pending(current))
                return -1;
 
-       spin_lock_bh(&np->np_thread_lock);
-       np->np_login_tpg = tpg;
-       spin_unlock_bh(&np->np_thread_lock);
+       spin_lock_bh(&tpg->tpg_state_lock);
+       if (tpg->tpg_state != TPG_STATE_ACTIVE) {
+               spin_unlock_bh(&tpg->tpg_state_lock);
+               up(&tpg->np_login_sem);
+               return -1;
+       }
+       spin_unlock_bh(&tpg->tpg_state_lock);
 
        return 0;
 }
 
-int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg)
+void iscsit_login_kref_put(struct kref *kref)
+{
+       struct iscsi_tpg_np *tpg_np = container_of(kref,
+                               struct iscsi_tpg_np, tpg_np_kref);
+
+       complete(&tpg_np->tpg_np_comp);
+}
+
+int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg,
+                      struct iscsi_tpg_np *tpg_np)
 {
        struct iscsi_tiqn *tiqn = tpg->tpg_tiqn;
 
-       spin_lock_bh(&np->np_thread_lock);
-       np->np_login_tpg = NULL;
-       spin_unlock_bh(&np->np_thread_lock);
+       up(&tpg->np_login_sem);
 
-       mutex_unlock(&tpg->np_login_lock);
+       if (tpg_np)
+               kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put);
 
        if (tiqn)
                iscsit_put_tiqn_for_login(tiqn);
@@ -410,20 +414,10 @@ struct iscsi_np *iscsit_add_np(
 int iscsit_reset_np_thread(
        struct iscsi_np *np,
        struct iscsi_tpg_np *tpg_np,
-       struct iscsi_portal_group *tpg)
+       struct iscsi_portal_group *tpg,
+       bool shutdown)
 {
        spin_lock_bh(&np->np_thread_lock);
-       if (tpg && tpg_np) {
-               /*
-                * The reset operation need only be performed when the
-                * passed struct iscsi_portal_group has a login in progress
-                * to one of the network portals.
-                */
-               if (tpg_np->tpg_np->np_login_tpg != tpg) {
-                       spin_unlock_bh(&np->np_thread_lock);
-                       return 0;
-               }
-       }
        if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) {
                spin_unlock_bh(&np->np_thread_lock);
                return 0;
@@ -438,6 +432,12 @@ int iscsit_reset_np_thread(
        }
        spin_unlock_bh(&np->np_thread_lock);
 
+       if (tpg_np && shutdown) {
+               kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put);
+
+               wait_for_completion(&tpg_np->tpg_np_comp);
+       }
+
        return 0;
 }
 
@@ -497,7 +497,6 @@ static struct iscsit_transport iscsi_target_transport = {
        .iscsit_setup_np        = iscsit_setup_np,
        .iscsit_accept_np       = iscsit_accept_np,
        .iscsit_free_np         = iscsit_free_np,
-       .iscsit_alloc_cmd       = iscsit_alloc_cmd,
        .iscsit_get_login_rx    = iscsit_get_login_rx,
        .iscsit_put_login_tx    = iscsit_put_login_tx,
        .iscsit_get_dataout     = iscsit_build_r2ts_for_cmd,
@@ -538,22 +537,13 @@ static int __init iscsi_target_init_module(void)
                goto ts_out1;
        }
 
-       lio_cmd_cache = kmem_cache_create("lio_cmd_cache",
-                       sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd),
-                       0, NULL);
-       if (!lio_cmd_cache) {
-               pr_err("Unable to kmem_cache_create() for"
-                               " lio_cmd_cache\n");
-               goto ts_out2;
-       }
-
        lio_qr_cache = kmem_cache_create("lio_qr_cache",
                        sizeof(struct iscsi_queue_req),
                        __alignof__(struct iscsi_queue_req), 0, NULL);
        if (!lio_qr_cache) {
                pr_err("nable to kmem_cache_create() for"
                                " lio_qr_cache\n");
-               goto cmd_out;
+               goto ts_out2;
        }
 
        lio_dr_cache = kmem_cache_create("lio_dr_cache",
@@ -597,8 +587,6 @@ dr_out:
        kmem_cache_destroy(lio_dr_cache);
 qr_out:
        kmem_cache_destroy(lio_qr_cache);
-cmd_out:
-       kmem_cache_destroy(lio_cmd_cache);
 ts_out2:
        iscsi_deallocate_thread_sets();
 ts_out1:
@@ -616,7 +604,6 @@ static void __exit iscsi_target_cleanup_module(void)
        iscsi_thread_set_free();
        iscsit_release_discovery_tpg();
        iscsit_unregister_transport(&iscsi_target_transport);
-       kmem_cache_destroy(lio_cmd_cache);
        kmem_cache_destroy(lio_qr_cache);
        kmem_cache_destroy(lio_dr_cache);
        kmem_cache_destroy(lio_ooo_cache);
@@ -3447,12 +3434,10 @@ static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd)
                                bool inaddr_any = iscsit_check_inaddr_any(np);
 
                                len = sprintf(buf, "TargetAddress="
-                                       "%s%s%s:%hu,%hu",
-                                       (np->np_sockaddr.ss_family == AF_INET6) ?
-                                       "[" : "", (inaddr_any == false) ?
+                                       "%s:%hu,%hu",
+                                       (inaddr_any == false) ?
                                                np->np_ip : conn->local_ip,
-                                       (np->np_sockaddr.ss_family == AF_INET6) ?
-                                       "]" : "", (inaddr_any == false) ?
+                                       (inaddr_any == false) ?
                                                np->np_port : conn->local_port,
                                        tpg->tpgt);
                                len += 1;
index 2c437cb..e936d56 100644 (file)
@@ -7,13 +7,15 @@ extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *);
 extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *);
 extern void iscsit_del_tiqn(struct iscsi_tiqn *);
 extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *);
-extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *);
+extern void iscsit_login_kref_put(struct kref *);
+extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *,
+                               struct iscsi_tpg_np *);
 extern bool iscsit_check_np_match(struct __kernel_sockaddr_storage *,
                                struct iscsi_np *, int);
 extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *,
                                char *, int);
 extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *,
-                               struct iscsi_portal_group *);
+                               struct iscsi_portal_group *, bool);
 extern int iscsit_del_np(struct iscsi_np *);
 extern int iscsit_reject_cmd(struct iscsi_cmd *cmd, u8, unsigned char *);
 extern void iscsit_set_unsoliticed_dataout(struct iscsi_cmd *);
@@ -37,7 +39,6 @@ extern struct target_fabric_configfs *lio_target_fabric_configfs;
 
 extern struct kmem_cache *lio_dr_cache;
 extern struct kmem_cache *lio_ooo_cache;
-extern struct kmem_cache *lio_cmd_cache;
 extern struct kmem_cache *lio_qr_cache;
 extern struct kmem_cache *lio_r2t_cache;
 
index cee1754..7505fdd 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file houses the main functions for the iSCSI CHAP support
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index bbfd288..fd14525 100644 (file)
@@ -2,9 +2,7 @@
  * This file contains the configfs implementation for iSCSI Target mode
  * from the LIO-Target Project.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -265,9 +263,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
                *port_str = '\0'; /* Terminate string for IP */
                port_str++; /* Skip over ":" */
 
-               ret = strict_strtoul(port_str, 0, &port);
+               ret = kstrtoul(port_str, 0, &port);
                if (ret < 0) {
-                       pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+                       pr_err("kstrtoul() failed for port_str: %d\n", ret);
                        return ERR_PTR(ret);
                }
                sock_in6 = (struct sockaddr_in6 *)&sockaddr;
@@ -290,9 +288,9 @@ static struct se_tpg_np *lio_target_call_addnptotpg(
                *port_str = '\0'; /* Terminate string for IP */
                port_str++; /* Skip over ":" */
 
-               ret = strict_strtoul(port_str, 0, &port);
+               ret = kstrtoul(port_str, 0, &port);
                if (ret < 0) {
-                       pr_err("strict_strtoul() failed for port_str: %d\n", ret);
+                       pr_err("kstrtoul() failed for port_str: %d\n", ret);
                        return ERR_PTR(ret);
                }
                sock_in = (struct sockaddr_in *)&sockaddr;
@@ -1481,7 +1479,7 @@ static ssize_t lio_target_wwn_show_attr_lio_version(
        struct target_fabric_configfs *tf,
        char *page)
 {
-       return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n");
+       return sprintf(page, "Datera Inc. iSCSI Target "ISCSIT_VERSION"\n");
 }
 
 TF_WWN_ATTR_RO(lio_target, lio_version);
@@ -1925,7 +1923,7 @@ static void lio_release_cmd(struct se_cmd *se_cmd)
        struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd);
 
        pr_debug("Entering lio_release_cmd for se_cmd: %p\n", se_cmd);
-       cmd->release_cmd(cmd);
+       iscsit_release_cmd(cmd);
 }
 
 /* End functions for target_core_fabric_ops */
index 4f77a78..9a5721b 100644 (file)
@@ -9,7 +9,7 @@
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
 
-#define ISCSIT_VERSION                 "v4.1.0-rc2"
+#define ISCSIT_VERSION                 "v4.1.0"
 #define ISCSI_MAX_DATASN_MISSING_COUNT 16
 #define ISCSI_TX_THREAD_TCP_TIMEOUT    2
 #define ISCSI_RX_THREAD_TCP_TIMEOUT    2
@@ -17,6 +17,9 @@
 #define SECONDS_FOR_ASYNC_TEXT         10
 #define SECONDS_FOR_LOGOUT_COMP                15
 #define WHITE_SPACE                    " \t\v\f\n\r"
+#define ISCSIT_MIN_TAGS                        16
+#define ISCSIT_EXTRA_TAGS              8
+#define ISCSIT_TCP_BACKLOG             256
 
 /* struct iscsi_node_attrib sanity values */
 #define NA_DATAOUT_TIMEOUT             3
@@ -47,7 +50,7 @@
 #define TA_NETIF_TIMEOUT_MAX           15
 #define TA_NETIF_TIMEOUT_MIN           2
 #define TA_GENERATE_NODE_ACLS          0
-#define TA_DEFAULT_CMDSN_DEPTH         16
+#define TA_DEFAULT_CMDSN_DEPTH         64
 #define TA_DEFAULT_CMDSN_DEPTH_MAX     512
 #define TA_DEFAULT_CMDSN_DEPTH_MIN     1
 #define TA_CACHE_DYNAMIC_ACLS          0
@@ -489,7 +492,6 @@ struct iscsi_cmd {
        u32                     first_data_sg_off;
        u32                     kmapped_nents;
        sense_reason_t          sense_reason;
-       void (*release_cmd)(struct iscsi_cmd *);
 }  ____cacheline_aligned;
 
 struct iscsi_tmr_req {
@@ -554,9 +556,19 @@ struct iscsi_conn {
        struct completion       rx_half_close_comp;
        /* socket used by this connection */
        struct socket           *sock;
+       void                    (*orig_data_ready)(struct sock *, int);
+       void                    (*orig_state_change)(struct sock *);
+#define LOGIN_FLAGS_READ_ACTIVE                1
+#define LOGIN_FLAGS_CLOSED             2
+#define LOGIN_FLAGS_READY              4
+       unsigned long           login_flags;
+       struct delayed_work     login_work;
+       struct delayed_work     login_cleanup_work;
+       struct iscsi_login      *login;
        struct timer_list       nopin_timer;
        struct timer_list       nopin_response_timer;
        struct timer_list       transport_timer;
+       struct task_struct      *login_kworker;
        /* Spinlock used for add/deleting cmd's from conn_cmd_list */
        spinlock_t              cmd_lock;
        spinlock_t              conn_usage_lock;
@@ -584,6 +596,7 @@ struct iscsi_conn {
        void                    *context;
        struct iscsi_login_thread_s *login_thread;
        struct iscsi_portal_group *tpg;
+       struct iscsi_tpg_np     *tpg_np;
        /* Pointer to parent session */
        struct iscsi_session    *sess;
        /* Pointer to thread_set in use for this conn's threads */
@@ -682,6 +695,7 @@ struct iscsi_login {
        u8 version_max;
        u8 login_complete;
        u8 login_failed;
+       bool zero_tsih;
        char isid[6];
        u32 cmd_sn;
        itt_t init_task_tag;
@@ -694,6 +708,7 @@ struct iscsi_login {
        char *req_buf;
        char *rsp_buf;
        struct iscsi_conn *conn;
+       struct iscsi_np *np;
 } ____cacheline_aligned;
 
 struct iscsi_node_attrib {
@@ -773,7 +788,6 @@ struct iscsi_np {
        struct __kernel_sockaddr_storage np_sockaddr;
        struct task_struct      *np_thread;
        struct timer_list       np_login_timer;
-       struct iscsi_portal_group *np_login_tpg;
        void                    *np_context;
        struct iscsit_transport *np_transport;
        struct list_head        np_list;
@@ -788,6 +802,8 @@ struct iscsi_tpg_np {
        struct list_head        tpg_np_parent_list;
        struct se_tpg_np        se_tpg_np;
        spinlock_t              tpg_np_parent_lock;
+       struct completion       tpg_np_comp;
+       struct kref             tpg_np_kref;
 };
 
 struct iscsi_portal_group {
@@ -809,7 +825,7 @@ struct iscsi_portal_group {
        spinlock_t              tpg_state_lock;
        struct se_portal_group tpg_se_tpg;
        struct mutex            tpg_access_lock;
-       struct mutex            np_login_lock;
+       struct semaphore        np_login_sem;
        struct iscsi_tpg_attrib tpg_attrib;
        struct iscsi_node_auth  tpg_demo_auth;
        /* Pointer to default list of iSCSI parameters for TPG */
index 848fee7..e93d5a7 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the iSCSI Target DataIN value generation functions.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 1b74033..6c7a510 100644 (file)
@@ -2,9 +2,7 @@
  * This file contains the iSCSI Virtual Device and Disk Transport
  * agnostic related functions.
  *
- \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 08bd878..41052e5 100644 (file)
@@ -2,9 +2,7 @@
  * This file contains error recovery level zero functions used by
  * the iSCSI Target driver.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 586c268..e048d64 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains error recovery level one used by the iSCSI Target driver.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 45a5afd..33be1fb 100644 (file)
@@ -2,9 +2,7 @@
  * This file contains error recovery level two functions used by
  * the iSCSI Target driver.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index bc788c5..1794c75 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the login functions used by the iSCSI Target driver.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -50,6 +48,7 @@ static struct iscsi_login *iscsi_login_init_conn(struct iscsi_conn *conn)
                pr_err("Unable to allocate memory for struct iscsi_login.\n");
                return NULL;
        }
+       conn->login = login;
        login->conn = conn;
        login->first_request = 1;
 
@@ -428,7 +427,7 @@ static int iscsi_login_zero_tsih_s2(
                                ISCSI_LOGIN_STATUS_NO_RESOURCES);
                        return -1;
                }
-               rc = strict_strtoul(param->value, 0, &mrdsl);
+               rc = kstrtoul(param->value, 0, &mrdsl);
                if (rc < 0) {
                        iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                ISCSI_LOGIN_STATUS_NO_RESOURCES);
@@ -684,7 +683,7 @@ static void iscsi_post_login_start_timers(struct iscsi_conn *conn)
                iscsit_start_nopin_timer(conn);
 }
 
-static int iscsi_post_login_handler(
+int iscsi_post_login_handler(
        struct iscsi_np *np,
        struct iscsi_conn *conn,
        u8 zero_tsih)
@@ -872,7 +871,7 @@ int iscsit_setup_np(
        struct __kernel_sockaddr_storage *sockaddr)
 {
        struct socket *sock = NULL;
-       int backlog = 5, ret, opt = 0, len;
+       int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
 
        switch (np->np_network_transport) {
        case ISCSI_TCP:
@@ -1007,16 +1006,24 @@ int iscsit_accept_np(struct iscsi_np *np, struct iscsi_conn *conn)
                rc = conn->sock->ops->getname(conn->sock,
                                (struct sockaddr *)&sock_in6, &err, 1);
                if (!rc) {
-                       snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI6c",
-                               &sock_in6.sin6_addr.in6_u);
+                       if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr))
+                               snprintf(conn->login_ip, sizeof(conn->login_ip), "[%pI6c]",
+                                       &sock_in6.sin6_addr.in6_u);
+                       else
+                               snprintf(conn->login_ip, sizeof(conn->login_ip), "%pI4",
+                                       &sock_in6.sin6_addr.s6_addr32[3]);
                        conn->login_port = ntohs(sock_in6.sin6_port);
                }
 
                rc = conn->sock->ops->getname(conn->sock,
                                (struct sockaddr *)&sock_in6, &err, 0);
                if (!rc) {
-                       snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI6c",
-                               &sock_in6.sin6_addr.in6_u);
+                       if (!ipv6_addr_v4mapped(&sock_in6.sin6_addr))
+                               snprintf(conn->local_ip, sizeof(conn->local_ip), "[%pI6c]",
+                                       &sock_in6.sin6_addr.in6_u);
+                       else
+                               snprintf(conn->local_ip, sizeof(conn->local_ip), "%pI4",
+                                       &sock_in6.sin6_addr.s6_addr32[3]);
                        conn->local_port = ntohs(sock_in6.sin6_port);
                }
        } else {
@@ -1116,6 +1123,77 @@ iscsit_conn_set_transport(struct iscsi_conn *conn, struct iscsit_transport *t)
        return 0;
 }
 
+void iscsi_target_login_sess_out(struct iscsi_conn *conn,
+               struct iscsi_np *np, bool zero_tsih, bool new_sess)
+{
+       if (new_sess == false)
+               goto old_sess_out;
+
+       pr_err("iSCSI Login negotiation failed.\n");
+       iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
+                                  ISCSI_LOGIN_STATUS_INIT_ERR);
+       if (!zero_tsih || !conn->sess)
+               goto old_sess_out;
+       if (conn->sess->se_sess)
+               transport_free_session(conn->sess->se_sess);
+       if (conn->sess->session_index != 0) {
+               spin_lock_bh(&sess_idr_lock);
+               idr_remove(&sess_idr, conn->sess->session_index);
+               spin_unlock_bh(&sess_idr_lock);
+       }
+       kfree(conn->sess->sess_ops);
+       kfree(conn->sess);
+
+old_sess_out:
+       iscsi_stop_login_thread_timer(np);
+       /*
+        * If login negotiation fails check if the Time2Retain timer
+        * needs to be restarted.
+        */
+       if (!zero_tsih && conn->sess) {
+               spin_lock_bh(&conn->sess->conn_lock);
+               if (conn->sess->session_state == TARG_SESS_STATE_FAILED) {
+                       struct se_portal_group *se_tpg =
+                                       &ISCSI_TPG_C(conn)->tpg_se_tpg;
+
+                       atomic_set(&conn->sess->session_continuation, 0);
+                       spin_unlock_bh(&conn->sess->conn_lock);
+                       spin_lock_bh(&se_tpg->session_lock);
+                       iscsit_start_time2retain_handler(conn->sess);
+                       spin_unlock_bh(&se_tpg->session_lock);
+               } else
+                       spin_unlock_bh(&conn->sess->conn_lock);
+               iscsit_dec_session_usage_count(conn->sess);
+       }
+
+       if (!IS_ERR(conn->conn_rx_hash.tfm))
+               crypto_free_hash(conn->conn_rx_hash.tfm);
+       if (!IS_ERR(conn->conn_tx_hash.tfm))
+               crypto_free_hash(conn->conn_tx_hash.tfm);
+
+       if (conn->conn_cpumask)
+               free_cpumask_var(conn->conn_cpumask);
+
+       kfree(conn->conn_ops);
+
+       if (conn->param_list) {
+               iscsi_release_param_list(conn->param_list);
+               conn->param_list = NULL;
+       }
+       iscsi_target_nego_release(conn);
+
+       if (conn->sock) {
+               sock_release(conn->sock);
+               conn->sock = NULL;
+       }
+
+       if (conn->conn_transport->iscsit_free_conn)
+               conn->conn_transport->iscsit_free_conn(conn);
+
+       iscsit_put_transport(conn->conn_transport);
+       kfree(conn);
+}
+
 static int __iscsi_target_login_thread(struct iscsi_np *np)
 {
        u8 *buffer, zero_tsih = 0;
@@ -1124,6 +1202,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
        struct iscsi_login *login;
        struct iscsi_portal_group *tpg = NULL;
        struct iscsi_login_req *pdu;
+       struct iscsi_tpg_np *tpg_np;
+       bool new_sess = false;
 
        flush_signals(current);
 
@@ -1264,6 +1344,7 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
                tpg = conn->tpg;
                goto new_sess_out;
        }
+       login->zero_tsih = zero_tsih;
 
        tpg = conn->tpg;
        if (!tpg) {
@@ -1279,7 +1360,8 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
                        goto old_sess_out;
        }
 
-       if (iscsi_target_start_negotiation(login, conn) < 0)
+       ret = iscsi_target_start_negotiation(login, conn);
+       if (ret < 0)
                goto new_sess_out;
 
        if (!conn->sess) {
@@ -1292,84 +1374,32 @@ static int __iscsi_target_login_thread(struct iscsi_np *np)
        if (signal_pending(current))
                goto new_sess_out;
 
-       ret = iscsi_post_login_handler(np, conn, zero_tsih);
+       if (ret == 1) {
+               tpg_np = conn->tpg_np;
 
-       if (ret < 0)
-               goto new_sess_out;
+               ret = iscsi_post_login_handler(np, conn, zero_tsih);
+               if (ret < 0)
+                       goto new_sess_out;
+
+               iscsit_deaccess_np(np, tpg, tpg_np);
+       }
 
-       iscsit_deaccess_np(np, tpg);
        tpg = NULL;
+       tpg_np = NULL;
        /* Get another socket */
        return 1;
 
 new_sess_out:
-       pr_err("iSCSI Login negotiation failed.\n");
-       iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR,
-                                 ISCSI_LOGIN_STATUS_INIT_ERR);
-       if (!zero_tsih || !conn->sess)
-               goto old_sess_out;
-       if (conn->sess->se_sess)
-               transport_free_session(conn->sess->se_sess);
-       if (conn->sess->session_index != 0) {
-               spin_lock_bh(&sess_idr_lock);
-               idr_remove(&sess_idr, conn->sess->session_index);
-               spin_unlock_bh(&sess_idr_lock);
-       }
-       kfree(conn->sess->sess_ops);
-       kfree(conn->sess);
+       new_sess = true;
 old_sess_out:
-       iscsi_stop_login_thread_timer(np);
-       /*
-        * If login negotiation fails check if the Time2Retain timer
-        * needs to be restarted.
-        */
-       if (!zero_tsih && conn->sess) {
-               spin_lock_bh(&conn->sess->conn_lock);
-               if (conn->sess->session_state == TARG_SESS_STATE_FAILED) {
-                       struct se_portal_group *se_tpg =
-                                       &ISCSI_TPG_C(conn)->tpg_se_tpg;
-
-                       atomic_set(&conn->sess->session_continuation, 0);
-                       spin_unlock_bh(&conn->sess->conn_lock);
-                       spin_lock_bh(&se_tpg->session_lock);
-                       iscsit_start_time2retain_handler(conn->sess);
-                       spin_unlock_bh(&se_tpg->session_lock);
-               } else
-                       spin_unlock_bh(&conn->sess->conn_lock);
-               iscsit_dec_session_usage_count(conn->sess);
-       }
-
-       if (!IS_ERR(conn->conn_rx_hash.tfm))
-               crypto_free_hash(conn->conn_rx_hash.tfm);
-       if (!IS_ERR(conn->conn_tx_hash.tfm))
-               crypto_free_hash(conn->conn_tx_hash.tfm);
-
-       if (conn->conn_cpumask)
-               free_cpumask_var(conn->conn_cpumask);
-
-       kfree(conn->conn_ops);
-
-       if (conn->param_list) {
-               iscsi_release_param_list(conn->param_list);
-               conn->param_list = NULL;
-       }
-       iscsi_target_nego_release(conn);
-
-       if (conn->sock) {
-               sock_release(conn->sock);
-               conn->sock = NULL;
-       }
-
-       if (conn->conn_transport->iscsit_free_conn)
-               conn->conn_transport->iscsit_free_conn(conn);
-
-       iscsit_put_transport(conn->conn_transport);
-
-       kfree(conn);
+       tpg_np = conn->tpg_np;
+       iscsi_target_login_sess_out(conn, np, zero_tsih, new_sess);
+       new_sess = false;
 
        if (tpg) {
-               iscsit_deaccess_np(np, tpg);
+               iscsit_deaccess_np(np, tpg, tpg_np);
                tpg = NULL;
+               tpg_np = NULL;
        }
 
 out:
index 63efd28..29d0983 100644 (file)
@@ -12,6 +12,9 @@ extern int iscsit_accept_np(struct iscsi_np *, struct iscsi_conn *);
 extern int iscsit_get_login_rx(struct iscsi_conn *, struct iscsi_login *);
 extern int iscsit_put_login_tx(struct iscsi_conn *, struct iscsi_login *, u32);
 extern void iscsit_free_conn(struct iscsi_np *, struct iscsi_conn *);
+extern int iscsi_post_login_handler(struct iscsi_np *, struct iscsi_conn *, u8);
+extern void iscsi_target_login_sess_out(struct iscsi_conn *, struct iscsi_np *,
+                               bool, bool);
 extern int iscsi_target_login_thread(void *);
 extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *);
 
index c4675b4..14d1aed 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains main functions related to iSCSI Parameter negotiation.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -377,15 +375,284 @@ static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_log
        return 0;
 }
 
-static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login)
+static void iscsi_target_sk_data_ready(struct sock *sk, int count)
 {
-       if (iscsi_target_do_tx_login_io(conn, login) < 0)
-               return -1;
+       struct iscsi_conn *conn = sk->sk_user_data;
+       bool rc;
 
-       if (conn->conn_transport->iscsit_get_login_rx(conn, login) < 0)
-               return -1;
+       pr_debug("Entering iscsi_target_sk_data_ready: conn: %p\n", conn);
 
-       return 0;
+       write_lock_bh(&sk->sk_callback_lock);
+       if (!sk->sk_user_data) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               return;
+       }
+       if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               pr_debug("Got LOGIN_FLAGS_READY=0, conn: %p >>>>\n", conn);
+               return;
+       }
+       if (test_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               pr_debug("Got LOGIN_FLAGS_CLOSED=1, conn: %p >>>>\n", conn);
+               return;
+       }
+       if (test_and_set_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1, conn: %p >>>>\n", conn);
+               return;
+       }
+
+       rc = schedule_delayed_work(&conn->login_work, 0);
+       if (rc == false) {
+               pr_debug("iscsi_target_sk_data_ready, schedule_delayed_work"
+                        " got false\n");
+       }
+       write_unlock_bh(&sk->sk_callback_lock);
+}
+
+static void iscsi_target_sk_state_change(struct sock *);
+
+static void iscsi_target_set_sock_callbacks(struct iscsi_conn *conn)
+{
+       struct sock *sk;
+
+       if (!conn->sock)
+               return;
+
+       sk = conn->sock->sk;
+       pr_debug("Entering iscsi_target_set_sock_callbacks: conn: %p\n", conn);
+
+       write_lock_bh(&sk->sk_callback_lock);
+       sk->sk_user_data = conn;
+       conn->orig_data_ready = sk->sk_data_ready;
+       conn->orig_state_change = sk->sk_state_change;
+       sk->sk_data_ready = iscsi_target_sk_data_ready;
+       sk->sk_state_change = iscsi_target_sk_state_change;
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       sk->sk_sndtimeo = TA_LOGIN_TIMEOUT * HZ;
+       sk->sk_rcvtimeo = TA_LOGIN_TIMEOUT * HZ;
+}
+
+static void iscsi_target_restore_sock_callbacks(struct iscsi_conn *conn)
+{
+       struct sock *sk;
+
+       if (!conn->sock)
+               return;
+
+       sk = conn->sock->sk;
+       pr_debug("Entering iscsi_target_restore_sock_callbacks: conn: %p\n", conn);
+
+       write_lock_bh(&sk->sk_callback_lock);
+       if (!sk->sk_user_data) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               return;
+       }
+       sk->sk_user_data = NULL;
+       sk->sk_data_ready = conn->orig_data_ready;
+       sk->sk_state_change = conn->orig_state_change;
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+       sk->sk_rcvtimeo = MAX_SCHEDULE_TIMEOUT;
+}
+
+static int iscsi_target_do_login(struct iscsi_conn *, struct iscsi_login *);
+
+static bool iscsi_target_sk_state_check(struct sock *sk)
+{
+       if (sk->sk_state == TCP_CLOSE_WAIT || sk->sk_state == TCP_CLOSE) {
+               pr_debug("iscsi_target_sk_state_check: TCP_CLOSE_WAIT|TCP_CLOSE,"
+                       "returning FALSE\n");
+               return false;
+       }
+       return true;
+}
+
+static void iscsi_target_login_drop(struct iscsi_conn *conn, struct iscsi_login *login)
+{
+       struct iscsi_np *np = login->np;
+       bool zero_tsih = login->zero_tsih;
+
+       iscsi_remove_failed_auth_entry(conn);
+       iscsi_target_nego_release(conn);
+       iscsi_target_login_sess_out(conn, np, zero_tsih, true);
+}
+
+static void iscsi_target_login_timeout(unsigned long data)
+{
+       struct iscsi_conn *conn = (struct iscsi_conn *)data;
+
+       pr_debug("Entering iscsi_target_login_timeout >>>>>>>>>>>>>>>>>>>\n");
+
+       if (conn->login_kworker) {
+               pr_debug("Sending SIGINT to conn->login_kworker %s/%d\n",
+                        conn->login_kworker->comm, conn->login_kworker->pid);
+               send_sig(SIGINT, conn->login_kworker, 1);
+       }
+}
+
+static void iscsi_target_do_login_rx(struct work_struct *work)
+{
+       struct iscsi_conn *conn = container_of(work,
+                               struct iscsi_conn, login_work.work);
+       struct iscsi_login *login = conn->login;
+       struct iscsi_np *np = login->np;
+       struct iscsi_portal_group *tpg = conn->tpg;
+       struct iscsi_tpg_np *tpg_np = conn->tpg_np;
+       struct timer_list login_timer;
+       int rc, zero_tsih = login->zero_tsih;
+       bool state;
+
+       pr_debug("entering iscsi_target_do_login_rx, conn: %p, %s:%d\n",
+                       conn, current->comm, current->pid);
+
+       spin_lock(&tpg->tpg_state_lock);
+       state = (tpg->tpg_state == TPG_STATE_ACTIVE);
+       spin_unlock(&tpg->tpg_state_lock);
+
+       if (state == false) {
+               pr_debug("iscsi_target_do_login_rx: tpg_state != TPG_STATE_ACTIVE\n");
+               iscsi_target_restore_sock_callbacks(conn);
+               iscsi_target_login_drop(conn, login);
+               iscsit_deaccess_np(np, tpg, tpg_np);
+               return;
+       }
+
+       if (conn->sock) {
+               struct sock *sk = conn->sock->sk;
+
+               read_lock_bh(&sk->sk_callback_lock);
+               state = iscsi_target_sk_state_check(sk);
+               read_unlock_bh(&sk->sk_callback_lock);
+
+               if (state == false) {
+                       pr_debug("iscsi_target_do_login_rx, TCP state CLOSE\n");
+                       iscsi_target_restore_sock_callbacks(conn);
+                       iscsi_target_login_drop(conn, login);
+                       iscsit_deaccess_np(np, tpg, tpg_np);
+                       return;
+               }
+       }
+
+       conn->login_kworker = current;
+       allow_signal(SIGINT);
+
+       init_timer(&login_timer);
+       login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ);
+       login_timer.data = (unsigned long)conn;
+       login_timer.function = iscsi_target_login_timeout;
+       add_timer(&login_timer);
+       pr_debug("Starting login_timer for %s/%d\n", current->comm, current->pid);
+
+       rc = conn->conn_transport->iscsit_get_login_rx(conn, login);
+       del_timer_sync(&login_timer);
+       flush_signals(current);
+       conn->login_kworker = NULL;
+
+       if (rc < 0) {
+               iscsi_target_restore_sock_callbacks(conn);
+               iscsi_target_login_drop(conn, login);
+               iscsit_deaccess_np(np, tpg, tpg_np);
+               return;
+       }
+
+       pr_debug("iscsi_target_do_login_rx after rx_login_io, %p, %s:%d\n",
+                       conn, current->comm, current->pid);
+
+       rc = iscsi_target_do_login(conn, login);
+       if (rc < 0) {
+               iscsi_target_restore_sock_callbacks(conn);
+               iscsi_target_login_drop(conn, login);
+               iscsit_deaccess_np(np, tpg, tpg_np);
+       } else if (!rc) {
+               if (conn->sock) {
+                       struct sock *sk = conn->sock->sk;
+
+                       write_lock_bh(&sk->sk_callback_lock);
+                       clear_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags);
+                       write_unlock_bh(&sk->sk_callback_lock);
+               }
+       } else if (rc == 1) {
+               iscsi_target_nego_release(conn);
+               iscsi_post_login_handler(np, conn, zero_tsih);
+               iscsit_deaccess_np(np, tpg, tpg_np);
+       }
+}
+
+static void iscsi_target_do_cleanup(struct work_struct *work)
+{
+       struct iscsi_conn *conn = container_of(work,
+                               struct iscsi_conn, login_cleanup_work.work);
+       struct sock *sk = conn->sock->sk;
+       struct iscsi_login *login = conn->login;
+       struct iscsi_np *np = login->np;
+       struct iscsi_portal_group *tpg = conn->tpg;
+       struct iscsi_tpg_np *tpg_np = conn->tpg_np;
+
+       pr_debug("Entering iscsi_target_do_cleanup\n");
+
+       cancel_delayed_work_sync(&conn->login_work);
+       conn->orig_state_change(sk);
+
+       iscsi_target_restore_sock_callbacks(conn);
+       iscsi_target_login_drop(conn, login);
+       iscsit_deaccess_np(np, tpg, tpg_np);
+
+       pr_debug("iscsi_target_do_cleanup done()\n");
+}
+
+static void iscsi_target_sk_state_change(struct sock *sk)
+{
+       struct iscsi_conn *conn;
+       void (*orig_state_change)(struct sock *);
+       bool state;
+
+       pr_debug("Entering iscsi_target_sk_state_change\n");
+
+       write_lock_bh(&sk->sk_callback_lock);
+       conn = sk->sk_user_data;
+       if (!conn) {
+               write_unlock_bh(&sk->sk_callback_lock);
+               return;
+       }
+       orig_state_change = conn->orig_state_change;
+
+       if (!test_bit(LOGIN_FLAGS_READY, &conn->login_flags)) {
+               pr_debug("Got LOGIN_FLAGS_READY=0 sk_state_change conn: %p\n",
+                        conn);
+               write_unlock_bh(&sk->sk_callback_lock);
+               orig_state_change(sk);
+               return;
+       }
+       if (test_bit(LOGIN_FLAGS_READ_ACTIVE, &conn->login_flags)) {
+               pr_debug("Got LOGIN_FLAGS_READ_ACTIVE=1 sk_state_change"
+                        " conn: %p\n", conn);
+               write_unlock_bh(&sk->sk_callback_lock);
+               orig_state_change(sk);
+               return;
+       }
+       if (test_and_set_bit(LOGIN_FLAGS_CLOSED, &conn->login_flags)) {
+               pr_debug("Got LOGIN_FLAGS_CLOSED=1 sk_state_change conn: %p\n",
+                        conn);
+               write_unlock_bh(&sk->sk_callback_lock);
+               orig_state_change(sk);
+               return;
+       }
+
+       state = iscsi_target_sk_state_check(sk);
+       write_unlock_bh(&sk->sk_callback_lock);
+
+       pr_debug("iscsi_target_sk_state_change: state: %d\n", state);
+
+       if (!state) {
+               pr_debug("iscsi_target_sk_state_change got failed state\n");
+               schedule_delayed_work(&conn->login_cleanup_work, 0);
+               return;
+       }
+       orig_state_change(sk);
 }
 
 /*
@@ -643,10 +910,11 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
                        if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
                                login->tsih = conn->sess->tsih;
                                login->login_complete = 1;
+                               iscsi_target_restore_sock_callbacks(conn);
                                if (iscsi_target_do_tx_login_io(conn,
                                                login) < 0)
                                        return -1;
-                               return 0;
+                               return 1;
                        }
                        break;
                default:
@@ -656,13 +924,29 @@ static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *lo
                        break;
                }
 
-               if (iscsi_target_do_login_io(conn, login) < 0)
+               if (iscsi_target_do_tx_login_io(conn, login) < 0)
                        return -1;
 
                if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) {
                        login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT;
                        login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK;
                }
+               break;
+       }
+
+       if (conn->sock) {
+               struct sock *sk = conn->sock->sk;
+               bool state;
+
+               read_lock_bh(&sk->sk_callback_lock);
+               state = iscsi_target_sk_state_check(sk);
+               read_unlock_bh(&sk->sk_callback_lock);
+
+               if (!state) {
+                       pr_debug("iscsi_target_do_login() failed state for"
+                                " conn: %p\n", conn);
+                       return -1;
+               }
        }
 
        return 0;
@@ -695,9 +979,17 @@ int iscsi_target_locate_portal(
        char *tmpbuf, *start = NULL, *end = NULL, *key, *value;
        struct iscsi_session *sess = conn->sess;
        struct iscsi_tiqn *tiqn;
+       struct iscsi_tpg_np *tpg_np = NULL;
        struct iscsi_login_req *login_req;
-       u32 payload_length;
-       int sessiontype = 0, ret = 0;
+       struct se_node_acl *se_nacl;
+       u32 payload_length, queue_depth = 0;
+       int sessiontype = 0, ret = 0, tag_num, tag_size;
+
+       INIT_DELAYED_WORK(&conn->login_work, iscsi_target_do_login_rx);
+       INIT_DELAYED_WORK(&conn->login_cleanup_work, iscsi_target_do_cleanup);
+       iscsi_target_set_sock_callbacks(conn);
+
+       login->np = np;
 
        login_req = (struct iscsi_login_req *) login->req;
        payload_length = ntoh24(login_req->dlength);
@@ -791,7 +1083,7 @@ int iscsi_target_locate_portal(
                        goto out;
                }
                ret = 0;
-               goto out;
+               goto alloc_tags;
        }
 
 get_target:
@@ -822,7 +1114,7 @@ get_target:
        /*
         * Locate Target Portal Group from Storage Node.
         */
-       conn->tpg = iscsit_get_tpg_from_np(tiqn, np);
+       conn->tpg = iscsit_get_tpg_from_np(tiqn, np, &tpg_np);
        if (!conn->tpg) {
                pr_err("Unable to locate Target Portal Group"
                                " on %s\n", tiqn->tiqn);
@@ -832,12 +1124,16 @@ get_target:
                ret = -1;
                goto out;
        }
+       conn->tpg_np = tpg_np;
        pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt);
        /*
         * Setup crc32c modules from libcrypto
         */
        if (iscsi_login_setup_crypto(conn) < 0) {
                pr_err("iscsi_login_setup_crypto() failed\n");
+               kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put);
+               iscsit_put_tiqn_for_login(tiqn);
+               conn->tpg = NULL;
                ret = -1;
                goto out;
        }
@@ -846,11 +1142,12 @@ get_target:
         * process login attempt.
         */
        if (iscsit_access_np(np, conn->tpg) < 0) {
+               kref_put(&tpg_np->tpg_np_kref, iscsit_login_kref_put);
                iscsit_put_tiqn_for_login(tiqn);
                iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
                                ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE);
-               ret = -1;
                conn->tpg = NULL;
+               ret = -1;
                goto out;
        }
 
@@ -883,8 +1180,27 @@ get_target:
                ret = -1;
                goto out;
        }
+       se_nacl = sess->se_sess->se_node_acl;
+       queue_depth = se_nacl->queue_depth;
+       /*
+        * Setup pre-allocated tags based upon allowed per NodeACL CmdSN
+        * depth for non immediate commands, plus extra tags for immediate
+        * commands.
+        *
+        * Also enforce a ISCSIT_MIN_TAGS to prevent unnecessary contention
+        * in per-cpu-ida tag allocation logic + small queue_depth.
+        */
+alloc_tags:
+       tag_num = max_t(u32, ISCSIT_MIN_TAGS, queue_depth);
+       tag_num += ISCSIT_EXTRA_TAGS;
+       tag_size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size;
 
-       ret = 0;
+       ret = transport_alloc_session_tags(sess->se_sess, tag_num, tag_size);
+       if (ret < 0) {
+               iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR,
+                                   ISCSI_LOGIN_STATUS_NO_RESOURCES);
+               ret = -1;
+       }
 out:
        kfree(tmpbuf);
        return ret;
@@ -897,10 +1213,23 @@ int iscsi_target_start_negotiation(
        int ret;
 
        ret = iscsi_target_do_login(conn, login);
-       if (ret != 0)
+       if (!ret) {
+               if (conn->sock) {
+                       struct sock *sk = conn->sock->sk;
+
+                       write_lock_bh(&sk->sk_callback_lock);
+                       set_bit(LOGIN_FLAGS_READY, &conn->login_flags);
+                       write_unlock_bh(&sk->sk_callback_lock);
+               }
+       } else if (ret < 0) {
+               cancel_delayed_work_sync(&conn->login_work);
+               cancel_delayed_work_sync(&conn->login_cleanup_work);
+               iscsi_target_restore_sock_callbacks(conn);
                iscsi_remove_failed_auth_entry(conn);
+       }
+       if (ret != 0)
+               iscsi_target_nego_release(conn);
 
-       iscsi_target_nego_release(conn);
        return ret;
 }
 
index 11dc293..93bdc47 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the main functions related to Initiator Node Attributes.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 35fd643..4d2e23f 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains main functions related to iSCSI Parameter negotiation.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -1182,7 +1180,7 @@ static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value,
                        unsigned long long tmp;
                        int rc;
 
-                       rc = strict_strtoull(param->value, 0, &tmp);
+                       rc = kstrtoull(param->value, 0, &tmp);
                        if (rc < 0)
                                return -1;
 
index edb592a..ca41b58 100644 (file)
@@ -2,9 +2,7 @@
  * This file contains main functions related to iSCSI DataSequenceInOrder=No
  * and DataPDUInOrder=No.
  *
- \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 464b420..f788e8b 100644 (file)
@@ -2,9 +2,7 @@
  * Modern ConfigFS group context specific iSCSI statistics based on original
  * iscsi_target_mib.c code
  *
- * Copyright (c) 2011 Rising Tide Systems
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * Copyright (c) 2011-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -177,7 +175,7 @@ ISCSI_STAT_INSTANCE_ATTR_RO(description);
 static ssize_t iscsi_stat_instance_show_attr_vendor(
        struct iscsi_wwn_stat_grps *igrps, char *page)
 {
-       return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n");
+       return snprintf(page, PAGE_SIZE, "Datera, Inc. iSCSI-Target\n");
 }
 ISCSI_STAT_INSTANCE_ATTR_RO(vendor);
 
@@ -432,13 +430,7 @@ static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr(
        int ret;
 
        spin_lock(&lstat->lock);
-       if (lstat->last_intr_fail_ip_family == AF_INET6) {
-               ret = snprintf(page, PAGE_SIZE, "[%s]\n",
-                              lstat->last_intr_fail_ip_addr);
-       } else {
-               ret = snprintf(page, PAGE_SIZE, "%s\n",
-                              lstat->last_intr_fail_ip_addr);
-       }
+       ret = snprintf(page, PAGE_SIZE, "%s\n", lstat->last_intr_fail_ip_addr);
        spin_unlock(&lstat->lock);
 
        return ret;
index b997e5d..78404b1 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the iSCSI Target specific Task Management functions.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 439260b..4faeb47 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains iSCSI Target Portal Group related functions.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -49,7 +47,7 @@ struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u1
        INIT_LIST_HEAD(&tpg->tpg_gnp_list);
        INIT_LIST_HEAD(&tpg->tpg_list);
        mutex_init(&tpg->tpg_access_lock);
-       mutex_init(&tpg->np_login_lock);
+       sema_init(&tpg->np_login_sem, 1);
        spin_lock_init(&tpg->tpg_state_lock);
        spin_lock_init(&tpg->tpg_np_lock);
 
@@ -129,7 +127,8 @@ void iscsit_release_discovery_tpg(void)
 
 struct iscsi_portal_group *iscsit_get_tpg_from_np(
        struct iscsi_tiqn *tiqn,
-       struct iscsi_np *np)
+       struct iscsi_np *np,
+       struct iscsi_tpg_np **tpg_np_out)
 {
        struct iscsi_portal_group *tpg = NULL;
        struct iscsi_tpg_np *tpg_np;
@@ -147,6 +146,8 @@ struct iscsi_portal_group *iscsit_get_tpg_from_np(
                spin_lock(&tpg->tpg_np_lock);
                list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) {
                        if (tpg_np->tpg_np == np) {
+                               *tpg_np_out = tpg_np;
+                               kref_get(&tpg_np->tpg_np_kref);
                                spin_unlock(&tpg->tpg_np_lock);
                                spin_unlock(&tiqn->tiqn_tpg_lock);
                                return tpg;
@@ -175,18 +176,20 @@ void iscsit_put_tpg(struct iscsi_portal_group *tpg)
 
 static void iscsit_clear_tpg_np_login_thread(
        struct iscsi_tpg_np *tpg_np,
-       struct iscsi_portal_group *tpg)
+       struct iscsi_portal_group *tpg,
+       bool shutdown)
 {
        if (!tpg_np->tpg_np) {
                pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n");
                return;
        }
 
-       iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg);
+       iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg, shutdown);
 }
 
 void iscsit_clear_tpg_np_login_threads(
-       struct iscsi_portal_group *tpg)
+       struct iscsi_portal_group *tpg,
+       bool shutdown)
 {
        struct iscsi_tpg_np *tpg_np;
 
@@ -197,7 +200,7 @@ void iscsit_clear_tpg_np_login_threads(
                        continue;
                }
                spin_unlock(&tpg->tpg_np_lock);
-               iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+               iscsit_clear_tpg_np_login_thread(tpg_np, tpg, shutdown);
                spin_lock(&tpg->tpg_np_lock);
        }
        spin_unlock(&tpg->tpg_np_lock);
@@ -268,6 +271,8 @@ int iscsit_tpg_del_portal_group(
        tpg->tpg_state = TPG_STATE_INACTIVE;
        spin_unlock(&tpg->tpg_state_lock);
 
+       iscsit_clear_tpg_np_login_threads(tpg, true);
+
        if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
                pr_err("Unable to delete iSCSI Target Portal Group:"
                        " %hu while active sessions exist, and force=0\n",
@@ -368,7 +373,7 @@ int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force)
        tpg->tpg_state = TPG_STATE_INACTIVE;
        spin_unlock(&tpg->tpg_state_lock);
 
-       iscsit_clear_tpg_np_login_threads(tpg);
+       iscsit_clear_tpg_np_login_threads(tpg, false);
 
        if (iscsit_release_sessions_for_tpg(tpg, force) < 0) {
                spin_lock(&tpg->tpg_state_lock);
@@ -490,6 +495,8 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal(
        INIT_LIST_HEAD(&tpg_np->tpg_np_child_list);
        INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list);
        spin_lock_init(&tpg_np->tpg_np_parent_lock);
+       init_completion(&tpg_np->tpg_np_comp);
+       kref_init(&tpg_np->tpg_np_kref);
        tpg_np->tpg_np          = np;
        tpg_np->tpg             = tpg;
 
@@ -520,7 +527,7 @@ static int iscsit_tpg_release_np(
        struct iscsi_portal_group *tpg,
        struct iscsi_np *np)
 {
-       iscsit_clear_tpg_np_login_thread(tpg_np, tpg);
+       iscsit_clear_tpg_np_login_thread(tpg_np, tpg, true);
 
        pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n",
                tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt,
index dda48c1..b77693e 100644 (file)
@@ -5,10 +5,10 @@ extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *,
 extern int iscsit_load_discovery_tpg(void);
 extern void iscsit_release_discovery_tpg(void);
 extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *,
-                       struct iscsi_np *);
+                       struct iscsi_np *, struct iscsi_tpg_np **);
 extern int iscsit_get_tpg(struct iscsi_portal_group *);
 extern void iscsit_put_tpg(struct iscsi_portal_group *);
-extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *);
+extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *, bool);
 extern void iscsit_tpg_dump_params(struct iscsi_portal_group *);
 extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *);
 extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *,
index 8128952..601e9cc 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the iSCSI Login Thread and Thread Queue functions.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -105,12 +103,11 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count)
                ts->status = ISCSI_THREAD_SET_FREE;
                INIT_LIST_HEAD(&ts->ts_list);
                spin_lock_init(&ts->ts_state_lock);
-               init_completion(&ts->rx_post_start_comp);
-               init_completion(&ts->tx_post_start_comp);
                init_completion(&ts->rx_restart_comp);
                init_completion(&ts->tx_restart_comp);
                init_completion(&ts->rx_start_comp);
                init_completion(&ts->tx_start_comp);
+               sema_init(&ts->ts_activate_sem, 0);
 
                ts->create_threads = 1;
                ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s",
@@ -139,35 +136,44 @@ int iscsi_allocate_thread_sets(u32 thread_pair_count)
        return allocated_thread_pair_count;
 }
 
-void iscsi_deallocate_thread_sets(void)
+static void iscsi_deallocate_thread_one(struct iscsi_thread_set *ts)
 {
-       u32 released_count = 0;
-       struct iscsi_thread_set *ts = NULL;
-
-       while ((ts = iscsi_get_ts_from_inactive_list())) {
+       spin_lock_bh(&ts->ts_state_lock);
+       ts->status = ISCSI_THREAD_SET_DIE;
 
+       if (ts->rx_thread) {
+               complete(&ts->rx_start_comp);
+               spin_unlock_bh(&ts->ts_state_lock);
+               kthread_stop(ts->rx_thread);
                spin_lock_bh(&ts->ts_state_lock);
-               ts->status = ISCSI_THREAD_SET_DIE;
+       }
+       if (ts->tx_thread) {
+               complete(&ts->tx_start_comp);
                spin_unlock_bh(&ts->ts_state_lock);
+               kthread_stop(ts->tx_thread);
+               spin_lock_bh(&ts->ts_state_lock);
+       }
+       spin_unlock_bh(&ts->ts_state_lock);
+       /*
+        * Release this thread_id in the thread_set_bitmap
+        */
+       spin_lock(&ts_bitmap_lock);
+       bitmap_release_region(iscsit_global->ts_bitmap,
+                       ts->thread_id, get_order(1));
+       spin_unlock(&ts_bitmap_lock);
 
-               if (ts->rx_thread) {
-                       send_sig(SIGINT, ts->rx_thread, 1);
-                       kthread_stop(ts->rx_thread);
-               }
-               if (ts->tx_thread) {
-                       send_sig(SIGINT, ts->tx_thread, 1);
-                       kthread_stop(ts->tx_thread);
-               }
-               /*
-                * Release this thread_id in the thread_set_bitmap
-                */
-               spin_lock(&ts_bitmap_lock);
-               bitmap_release_region(iscsit_global->ts_bitmap,
-                               ts->thread_id, get_order(1));
-               spin_unlock(&ts_bitmap_lock);
+       kfree(ts);
+}
 
+void iscsi_deallocate_thread_sets(void)
+{
+       struct iscsi_thread_set *ts = NULL;
+       u32 released_count = 0;
+
+       while ((ts = iscsi_get_ts_from_inactive_list())) {
+
+               iscsi_deallocate_thread_one(ts);
                released_count++;
-               kfree(ts);
        }
 
        if (released_count)
@@ -187,34 +193,13 @@ static void iscsi_deallocate_extra_thread_sets(void)
                if (!ts)
                        break;
 
-               spin_lock_bh(&ts->ts_state_lock);
-               ts->status = ISCSI_THREAD_SET_DIE;
-               spin_unlock_bh(&ts->ts_state_lock);
-
-               if (ts->rx_thread) {
-                       send_sig(SIGINT, ts->rx_thread, 1);
-                       kthread_stop(ts->rx_thread);
-               }
-               if (ts->tx_thread) {
-                       send_sig(SIGINT, ts->tx_thread, 1);
-                       kthread_stop(ts->tx_thread);
-               }
-               /*
-                * Release this thread_id in the thread_set_bitmap
-                */
-               spin_lock(&ts_bitmap_lock);
-               bitmap_release_region(iscsit_global->ts_bitmap,
-                               ts->thread_id, get_order(1));
-               spin_unlock(&ts_bitmap_lock);
-
+               iscsi_deallocate_thread_one(ts);
                released_count++;
-               kfree(ts);
        }
 
-       if (released_count) {
+       if (released_count)
                pr_debug("Stopped %d thread set(s) (%d total threads)."
                        "\n", released_count, released_count * 2);
-       }
 }
 
 void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts)
@@ -224,37 +209,23 @@ void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set
        spin_lock_bh(&ts->ts_state_lock);
        conn->thread_set = ts;
        ts->conn = conn;
+       ts->status = ISCSI_THREAD_SET_ACTIVE;
        spin_unlock_bh(&ts->ts_state_lock);
-       /*
-        * Start up the RX thread and wait on rx_post_start_comp.  The RX
-        * Thread will then do the same for the TX Thread in
-        * iscsi_rx_thread_pre_handler().
-        */
+
        complete(&ts->rx_start_comp);
-       wait_for_completion(&ts->rx_post_start_comp);
+       complete(&ts->tx_start_comp);
+
+       down(&ts->ts_activate_sem);
 }
 
 struct iscsi_thread_set *iscsi_get_thread_set(void)
 {
-       int allocate_ts = 0;
-       struct completion comp;
-       struct iscsi_thread_set *ts = NULL;
-       /*
-        * If no inactive thread set is available on the first call to
-        * iscsi_get_ts_from_inactive_list(), sleep for a second and
-        * try again.  If still none are available after two attempts,
-        * allocate a set ourselves.
-        */
+       struct iscsi_thread_set *ts;
+
 get_set:
        ts = iscsi_get_ts_from_inactive_list();
        if (!ts) {
-               if (allocate_ts == 2)
-                       iscsi_allocate_thread_sets(1);
-
-               init_completion(&comp);
-               wait_for_completion_timeout(&comp, 1 * HZ);
-
-               allocate_ts++;
+               iscsi_allocate_thread_sets(1);
                goto get_set;
        }
 
@@ -263,6 +234,7 @@ get_set:
        ts->thread_count = 2;
        init_completion(&ts->rx_restart_comp);
        init_completion(&ts->tx_restart_comp);
+       sema_init(&ts->ts_activate_sem, 0);
 
        return ts;
 }
@@ -400,7 +372,8 @@ static void iscsi_check_to_add_additional_sets(void)
 static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts)
 {
        spin_lock_bh(&ts->ts_state_lock);
-       if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) {
+       if (ts->status == ISCSI_THREAD_SET_DIE || kthread_should_stop() ||
+           signal_pending(current)) {
                spin_unlock_bh(&ts->ts_state_lock);
                return -1;
        }
@@ -419,7 +392,8 @@ struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts)
                goto sleep;
        }
 
-       flush_signals(current);
+       if (ts->status != ISCSI_THREAD_SET_DIE)
+               flush_signals(current);
 
        if (ts->delay_inactive && (--ts->thread_count == 0)) {
                spin_unlock_bh(&ts->ts_state_lock);
@@ -446,18 +420,19 @@ sleep:
        if (iscsi_signal_thread_pre_handler(ts) < 0)
                return NULL;
 
+       iscsi_check_to_add_additional_sets();
+
+       spin_lock_bh(&ts->ts_state_lock);
        if (!ts->conn) {
                pr_err("struct iscsi_thread_set->conn is NULL for"
-                       " thread_id: %d, going back to sleep\n", ts->thread_id);
-               goto sleep;
+                       " RX thread_id: %s/%d\n", current->comm, current->pid);
+               spin_unlock_bh(&ts->ts_state_lock);
+               return NULL;
        }
-       iscsi_check_to_add_additional_sets();
-       /*
-        * The RX Thread starts up the TX Thread and sleeps.
-        */
        ts->thread_clear |= ISCSI_CLEAR_RX_THREAD;
-       complete(&ts->tx_start_comp);
-       wait_for_completion(&ts->tx_post_start_comp);
+       spin_unlock_bh(&ts->ts_state_lock);
+
+       up(&ts->ts_activate_sem);
 
        return ts->conn;
 }
@@ -472,7 +447,8 @@ struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts)
                goto sleep;
        }
 
-       flush_signals(current);
+       if (ts->status != ISCSI_THREAD_SET_DIE)
+               flush_signals(current);
 
        if (ts->delay_inactive && (--ts->thread_count == 0)) {
                spin_unlock_bh(&ts->ts_state_lock);
@@ -498,27 +474,20 @@ sleep:
        if (iscsi_signal_thread_pre_handler(ts) < 0)
                return NULL;
 
-       if (!ts->conn) {
-               pr_err("struct iscsi_thread_set->conn is NULL for "
-                       " thread_id: %d, going back to sleep\n",
-                       ts->thread_id);
-               goto sleep;
-       }
-
        iscsi_check_to_add_additional_sets();
-       /*
-        * From the TX thread, up the tx_post_start_comp that the RX Thread is
-        * sleeping on in iscsi_rx_thread_pre_handler(), then up the
-        * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on.
-        */
-       ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
-       complete(&ts->tx_post_start_comp);
-       complete(&ts->rx_post_start_comp);
 
        spin_lock_bh(&ts->ts_state_lock);
-       ts->status = ISCSI_THREAD_SET_ACTIVE;
+       if (!ts->conn) {
+               pr_err("struct iscsi_thread_set->conn is NULL for"
+                       " TX thread_id: %s/%d\n", current->comm, current->pid);
+               spin_unlock_bh(&ts->ts_state_lock);
+               return NULL;
+       }
+       ts->thread_clear |= ISCSI_CLEAR_TX_THREAD;
        spin_unlock_bh(&ts->ts_state_lock);
 
+       up(&ts->ts_activate_sem);
+
        return ts->conn;
 }
 
index 547d118..cc1eede 100644 (file)
@@ -64,10 +64,6 @@ struct iscsi_thread_set {
        struct iscsi_conn       *conn;
        /* used for controlling ts state accesses */
        spinlock_t      ts_state_lock;
-       /* Used for rx side post startup */
-       struct completion       rx_post_start_comp;
-       /* Used for tx side post startup */
-       struct completion       tx_post_start_comp;
        /* used for restarting thread queue */
        struct completion       rx_restart_comp;
        /* used for restarting thread queue */
@@ -82,6 +78,7 @@ struct iscsi_thread_set {
        struct task_struct      *tx_thread;
        /* struct iscsi_thread_set in list list head*/
        struct list_head        ts_list;
+       struct semaphore        ts_activate_sem;
 };
 
 #endif   /*** ISCSI_THREAD_QUEUE_H ***/
index 1df06d5..f2de28e 100644 (file)
@@ -1,9 +1,7 @@
 /*******************************************************************************
  * This file contains the iSCSI Target specific utility functions.
  *
- * \u00a9 Copyright 2007-2011 RisingTide Systems LLC.
- *
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
+ * (c) Copyright 2007-2013 Datera, Inc.
  *
  * Author: Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
@@ -19,6 +17,7 @@
  ******************************************************************************/
 
 #include <linux/list.h>
+#include <linux/percpu_ida.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
@@ -149,18 +148,6 @@ void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd)
        spin_unlock_bh(&cmd->r2t_lock);
 }
 
-struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
-{
-       struct iscsi_cmd *cmd;
-
-       cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask);
-       if (!cmd)
-               return NULL;
-
-       cmd->release_cmd = &iscsit_release_cmd;
-       return cmd;
-}
-
 /*
  * May be called from software interrupt (timer) context for allocating
  * iSCSI NopINs.
@@ -168,12 +155,15 @@ struct iscsi_cmd *iscsit_alloc_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
 struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask)
 {
        struct iscsi_cmd *cmd;
+       struct se_session *se_sess = conn->sess->se_sess;
+       int size, tag;
 
-       cmd = conn->conn_transport->iscsit_alloc_cmd(conn, gfp_mask);
-       if (!cmd) {
-               pr_err("Unable to allocate memory for struct iscsi_cmd.\n");
-               return NULL;
-       }
+       tag = percpu_ida_alloc(&se_sess->sess_tag_pool, gfp_mask);
+       size = sizeof(struct iscsi_cmd) + conn->conn_transport->priv_size;
+       cmd = (struct iscsi_cmd *)(se_sess->sess_cmd_map + (tag * size));
+       memset(cmd, 0, size);
+
+       cmd->se_cmd.map_tag = tag;
        cmd->conn = conn;
        INIT_LIST_HEAD(&cmd->i_conn_node);
        INIT_LIST_HEAD(&cmd->datain_list);
@@ -689,6 +679,16 @@ void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn)
 
 void iscsit_release_cmd(struct iscsi_cmd *cmd)
 {
+       struct iscsi_session *sess;
+       struct se_cmd *se_cmd = &cmd->se_cmd;
+
+       if (cmd->conn)
+               sess = cmd->conn->sess;
+       else
+               sess = cmd->sess;
+
+       BUG_ON(!sess || !sess->se_sess);
+
        kfree(cmd->buf_ptr);
        kfree(cmd->pdu_list);
        kfree(cmd->seq_list);
@@ -696,8 +696,9 @@ void iscsit_release_cmd(struct iscsi_cmd *cmd)
        kfree(cmd->iov_data);
        kfree(cmd->text_in_ptr);
 
-       kmem_cache_free(lio_cmd_cache, cmd);
+       percpu_ida_free(&sess->se_sess->sess_tag_pool, se_cmd->map_tag);
 }
+EXPORT_SYMBOL(iscsit_release_cmd);
 
 static void __iscsit_free_cmd(struct iscsi_cmd *cmd, bool scsi_cmd,
                              bool check_queues)
@@ -761,7 +762,7 @@ void iscsit_free_cmd(struct iscsi_cmd *cmd, bool shutdown)
                /* Fall-through */
        default:
                __iscsit_free_cmd(cmd, false, shutdown);
-               cmd->release_cmd(cmd);
+               iscsit_release_cmd(cmd);
                break;
        }
 }
index 568ad25..0f6d69d 100644 (file)
@@ -3,7 +3,7 @@
  * This file contains the Linux/SCSI LLD virtual SCSI initiator driver
  * for emulated SAS initiator ports
  *
- * Â© Copyright 2011 RisingTide Systems LLC.
+ * Â© Copyright 2011-2013 Datera, Inc.
  *
  * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
  *
index cbe48ab..4724410 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains SPC-3 compliant asymmetric logical unit assigntment (ALUA)
  *
- * (c) Copyright 2009-2012 RisingTide Systems LLC.
+ * (c) Copyright 2009-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -557,6 +557,9 @@ target_alua_state_check(struct se_cmd *cmd)
         * a ALUA logical unit group.
         */
        tg_pt_gp_mem = port->sep_alua_tg_pt_gp_mem;
+       if (!tg_pt_gp_mem)
+               return 0;
+
        spin_lock(&tg_pt_gp_mem->tg_pt_gp_mem_lock);
        tg_pt_gp = tg_pt_gp_mem->tg_pt_gp;
        out_alua_state = atomic_read(&tg_pt_gp->tg_pt_gp_alua_access_state);
@@ -730,7 +733,7 @@ static int core_alua_write_tpg_metadata(
        if (ret < 0)
                pr_err("Error writing ALUA metadata file: %s\n", path);
        fput(file);
-       return ret ? -EIO : 0;
+       return (ret < 0) ? -EIO : 0;
 }
 
 /*
@@ -1756,10 +1759,10 @@ ssize_t core_alua_store_access_type(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract alua_access_type\n");
-               return -EINVAL;
+               return ret;
        }
        if ((tmp != 0) && (tmp != 1) && (tmp != 2) && (tmp != 3)) {
                pr_err("Illegal value for alua_access_type:"
@@ -1794,10 +1797,10 @@ ssize_t core_alua_store_nonop_delay_msecs(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract nonop_delay_msecs\n");
-               return -EINVAL;
+               return ret;
        }
        if (tmp > ALUA_MAX_NONOP_DELAY_MSECS) {
                pr_err("Passed nonop_delay_msecs: %lu, exceeds"
@@ -1825,10 +1828,10 @@ ssize_t core_alua_store_trans_delay_msecs(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract trans_delay_msecs\n");
-               return -EINVAL;
+               return ret;
        }
        if (tmp > ALUA_MAX_TRANS_DELAY_MSECS) {
                pr_err("Passed trans_delay_msecs: %lu, exceeds"
@@ -1856,10 +1859,10 @@ ssize_t core_alua_store_implict_trans_secs(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract implict_trans_secs\n");
-               return -EINVAL;
+               return ret;
        }
        if (tmp > ALUA_MAX_IMPLICT_TRANS_SECS) {
                pr_err("Passed implict_trans_secs: %lu, exceeds"
@@ -1887,10 +1890,10 @@ ssize_t core_alua_store_preferred_bit(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract preferred ALUA value\n");
-               return -EINVAL;
+               return ret;
        }
        if ((tmp != 0) && (tmp != 1)) {
                pr_err("Illegal value for preferred ALUA: %lu\n", tmp);
@@ -1922,10 +1925,10 @@ ssize_t core_alua_store_offline_bit(
        if (!lun->lun_sep)
                return -ENODEV;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract alua_tg_pt_offline value\n");
-               return -EINVAL;
+               return ret;
        }
        if ((tmp != 0) && (tmp != 1)) {
                pr_err("Illegal value for alua_tg_pt_offline: %lu\n",
@@ -1961,10 +1964,10 @@ ssize_t core_alua_store_secondary_status(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract alua_tg_pt_status\n");
-               return -EINVAL;
+               return ret;
        }
        if ((tmp != ALUA_STATUS_NONE) &&
            (tmp != ALUA_STATUS_ALTERED_BY_EXPLICT_STPG) &&
@@ -1994,10 +1997,10 @@ ssize_t core_alua_store_secondary_write_metadata(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract alua_tg_pt_write_md\n");
-               return -EINVAL;
+               return ret;
        }
        if ((tmp != 0) && (tmp != 1)) {
                pr_err("Illegal value for alua_tg_pt_write_md:"
index e4d2293..82e81c5 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains ConfigFS logic for the Generic Target Engine project.
  *
- * (c) Copyright 2008-2012 RisingTide Systems LLC.
+ * (c) Copyright 2008-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -48,6 +48,7 @@
 #include "target_core_alua.h"
 #include "target_core_pr.h"
 #include "target_core_rd.h"
+#include "target_core_xcopy.h"
 
 extern struct t10_alua_lu_gp *default_lu_gp;
 
@@ -268,7 +269,7 @@ static struct configfs_subsystem target_core_fabrics = {
        },
 };
 
-static struct configfs_subsystem *target_core_subsystem[] = {
+struct configfs_subsystem *target_core_subsystem[] = {
        &target_core_fabrics,
        NULL,
 };
@@ -577,9 +578,9 @@ static ssize_t target_core_dev_store_attr_##_name(                  \
        unsigned long val;                                              \
        int ret;                                                        \
                                                                        \
-       ret = strict_strtoul(page, 0, &val);                            \
+       ret = kstrtoul(page, 0, &val);                          \
        if (ret < 0) {                                                  \
-               pr_err("strict_strtoul() failed with"           \
+               pr_err("kstrtoul() failed with"         \
                        " ret: %d\n", ret);                             \
                return -EINVAL;                                         \
        }                                                               \
@@ -636,6 +637,12 @@ SE_DEV_ATTR(emulate_tpu, S_IRUGO | S_IWUSR);
 DEF_DEV_ATTRIB(emulate_tpws);
 SE_DEV_ATTR(emulate_tpws, S_IRUGO | S_IWUSR);
 
+DEF_DEV_ATTRIB(emulate_caw);
+SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR);
+
+DEF_DEV_ATTRIB(emulate_3pc);
+SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR);
+
 DEF_DEV_ATTRIB(enforce_pr_isids);
 SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR);
 
@@ -693,6 +700,8 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = {
        &target_core_dev_attrib_emulate_tas.attr,
        &target_core_dev_attrib_emulate_tpu.attr,
        &target_core_dev_attrib_emulate_tpws.attr,
+       &target_core_dev_attrib_emulate_caw.attr,
+       &target_core_dev_attrib_emulate_3pc.attr,
        &target_core_dev_attrib_enforce_pr_isids.attr,
        &target_core_dev_attrib_is_nonrot.attr,
        &target_core_dev_attrib_emulate_rest_reord.attr,
@@ -1310,9 +1319,9 @@ static ssize_t target_core_dev_pr_store_attr_res_aptpl_metadata(
                                ret = -ENOMEM;
                                goto out;
                        }
-                       ret = strict_strtoull(arg_p, 0, &tmp_ll);
+                       ret = kstrtoull(arg_p, 0, &tmp_ll);
                        if (ret < 0) {
-                               pr_err("strict_strtoull() failed for"
+                               pr_err("kstrtoull() failed for"
                                        " sa_res_key=\n");
                                goto out;
                        }
@@ -1836,11 +1845,11 @@ static ssize_t target_core_alua_lu_gp_store_attr_lu_gp_id(
        unsigned long lu_gp_id;
        int ret;
 
-       ret = strict_strtoul(page, 0, &lu_gp_id);
+       ret = kstrtoul(page, 0, &lu_gp_id);
        if (ret < 0) {
-               pr_err("strict_strtoul() returned %d for"
+               pr_err("kstrtoul() returned %d for"
                        " lu_gp_id\n", ret);
-               return -EINVAL;
+               return ret;
        }
        if (lu_gp_id > 0x0000ffff) {
                pr_err("ALUA lu_gp_id: %lu exceeds maximum:"
@@ -2032,11 +2041,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_state(
                return -EINVAL;
        }
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract new ALUA access state from"
                                " %s\n", page);
-               return -EINVAL;
+               return ret;
        }
        new_state = (int)tmp;
 
@@ -2079,11 +2088,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_access_status(
                return -EINVAL;
        }
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract new ALUA access status"
                                " from %s\n", page);
-               return -EINVAL;
+               return ret;
        }
        new_status = (int)tmp;
 
@@ -2139,10 +2148,10 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_alua_write_metadata(
        unsigned long tmp;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tmp);
+       ret = kstrtoul(page, 0, &tmp);
        if (ret < 0) {
                pr_err("Unable to extract alua_write_metadata\n");
-               return -EINVAL;
+               return ret;
        }
 
        if ((tmp != 0) && (tmp != 1)) {
@@ -2263,11 +2272,11 @@ static ssize_t target_core_alua_tg_pt_gp_store_attr_tg_pt_gp_id(
        unsigned long tg_pt_gp_id;
        int ret;
 
-       ret = strict_strtoul(page, 0, &tg_pt_gp_id);
+       ret = kstrtoul(page, 0, &tg_pt_gp_id);
        if (ret < 0) {
-               pr_err("strict_strtoul() returned %d for"
+               pr_err("kstrtoul() returned %d for"
                        " tg_pt_gp_id\n", ret);
-               return -EINVAL;
+               return ret;
        }
        if (tg_pt_gp_id > 0x0000ffff) {
                pr_err("ALUA tg_pt_gp_id: %lu exceeds maximum:"
@@ -2676,10 +2685,10 @@ static ssize_t target_core_hba_store_attr_hba_mode(struct se_hba *hba,
        if (transport->pmode_enable_hba == NULL)
                return -EINVAL;
 
-       ret = strict_strtoul(page, 0, &mode_flag);
+       ret = kstrtoul(page, 0, &mode_flag);
        if (ret < 0) {
                pr_err("Unable to extract hba mode flag: %d\n", ret);
-               return -EINVAL;
+               return ret;
        }
 
        if (hba->dev_count) {
@@ -2767,11 +2776,11 @@ static struct config_group *target_core_call_addhbatotarget(
                str++; /* Skip to start of plugin dependent ID */
        }
 
-       ret = strict_strtoul(str, 0, &plugin_dep_id);
+       ret = kstrtoul(str, 0, &plugin_dep_id);
        if (ret < 0) {
-               pr_err("strict_strtoul() returned %d for"
+               pr_err("kstrtoul() returned %d for"
                                " plugin_dep_id\n", ret);
-               return ERR_PTR(-EINVAL);
+               return ERR_PTR(ret);
        }
        /*
         * Load up TCM subsystem plugins if they have not already been loaded.
@@ -2927,6 +2936,10 @@ static int __init target_core_init_configfs(void)
        if (ret < 0)
                goto out;
 
+       ret = target_xcopy_setup_pt();
+       if (ret < 0)
+               goto out;
+
        return 0;
 
 out:
@@ -2999,6 +3012,7 @@ static void __exit target_core_exit_configfs(void)
 
        core_dev_release_virtual_lun0();
        rd_module_exit();
+       target_xcopy_release_pt();
        release_se_kmem_caches();
 }
 
index 8f4142f..d90dbb0 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains the TCM Virtual Device and Disk Transport
  * agnostic related functions.
  *
- * (c) Copyright 2003-2012 RisingTide Systems LLC.
+ * (c) Copyright 2003-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -47,6 +47,9 @@
 #include "target_core_pr.h"
 #include "target_core_ua.h"
 
+DEFINE_MUTEX(g_device_mutex);
+LIST_HEAD(g_device_list);
+
 static struct se_hba *lun0_hba;
 /* not static, needed by tpg.c */
 struct se_device *g_lun0_dev;
@@ -890,6 +893,32 @@ int se_dev_set_emulate_tpws(struct se_device *dev, int flag)
        return 0;
 }
 
+int se_dev_set_emulate_caw(struct se_device *dev, int flag)
+{
+       if (flag != 0 && flag != 1) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+       dev->dev_attrib.emulate_caw = flag;
+       pr_debug("dev[%p]: SE Device CompareAndWrite (AtomicTestandSet): %d\n",
+                dev, flag);
+
+       return 0;
+}
+
+int se_dev_set_emulate_3pc(struct se_device *dev, int flag)
+{
+       if (flag != 0 && flag != 1) {
+               pr_err("Illegal value %d\n", flag);
+               return -EINVAL;
+       }
+       dev->dev_attrib.emulate_3pc = flag;
+       pr_debug("dev[%p]: SE Device 3rd Party Copy (EXTENDED_COPY): %d\n",
+               dev, flag);
+
+       return 0;
+}
+
 int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag)
 {
        if ((flag != 0) && (flag != 1)) {
@@ -1393,6 +1422,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
        INIT_LIST_HEAD(&dev->delayed_cmd_list);
        INIT_LIST_HEAD(&dev->state_list);
        INIT_LIST_HEAD(&dev->qf_cmd_list);
+       INIT_LIST_HEAD(&dev->g_dev_node);
        spin_lock_init(&dev->stats_lock);
        spin_lock_init(&dev->execute_task_lock);
        spin_lock_init(&dev->delayed_cmd_lock);
@@ -1400,6 +1430,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
        spin_lock_init(&dev->se_port_lock);
        spin_lock_init(&dev->se_tmr_lock);
        spin_lock_init(&dev->qf_cmd_lock);
+       sema_init(&dev->caw_sem, 1);
        atomic_set(&dev->dev_ordered_id, 0);
        INIT_LIST_HEAD(&dev->t10_wwn.t10_vpd_list);
        spin_lock_init(&dev->t10_wwn.t10_vpd_lock);
@@ -1423,6 +1454,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name)
        dev->dev_attrib.emulate_tas = DA_EMULATE_TAS;
        dev->dev_attrib.emulate_tpu = DA_EMULATE_TPU;
        dev->dev_attrib.emulate_tpws = DA_EMULATE_TPWS;
+       dev->dev_attrib.emulate_caw = DA_EMULATE_CAW;
+       dev->dev_attrib.emulate_3pc = DA_EMULATE_3PC;
        dev->dev_attrib.enforce_pr_isids = DA_ENFORCE_PR_ISIDS;
        dev->dev_attrib.is_nonrot = DA_IS_NONROT;
        dev->dev_attrib.emulate_rest_reord = DA_EMULATE_REST_REORD;
@@ -1510,6 +1543,11 @@ int target_configure_device(struct se_device *dev)
        spin_lock(&hba->device_lock);
        hba->dev_count++;
        spin_unlock(&hba->device_lock);
+
+       mutex_lock(&g_device_mutex);
+       list_add_tail(&dev->g_dev_node, &g_device_list);
+       mutex_unlock(&g_device_mutex);
+
        return 0;
 
 out_free_alua:
@@ -1528,6 +1566,10 @@ void target_free_device(struct se_device *dev)
        if (dev->dev_flags & DF_CONFIGURED) {
                destroy_workqueue(dev->tmr_wq);
 
+               mutex_lock(&g_device_mutex);
+               list_del(&dev->g_dev_node);
+               mutex_unlock(&g_device_mutex);
+
                spin_lock(&hba->device_lock);
                hba->dev_count--;
                spin_unlock(&hba->device_lock);
index eb56eb1..3503996 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains generic fabric module configfs infrastructure for
  * TCM v4.x code
  *
- * (c) Copyright 2010-2012 RisingTide Systems LLC.
+ * (c) Copyright 2010-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@linux-iscsi.org>
 *
@@ -189,9 +189,11 @@ static ssize_t target_fabric_mappedlun_store_write_protect(
        struct se_node_acl *se_nacl = lacl->se_lun_nacl;
        struct se_portal_group *se_tpg = se_nacl->se_tpg;
        unsigned long op;
+       int ret;
 
-       if (strict_strtoul(page, 0, &op))
-               return -EINVAL;
+       ret = kstrtoul(page, 0, &op);
+       if (ret)
+               return ret;
 
        if ((op != 1) && (op != 0))
                return -EINVAL;
@@ -350,7 +352,10 @@ static struct config_group *target_fabric_make_mappedlun(
         * Determine the Mapped LUN value.  This is what the SCSI Initiator
         * Port will actually see.
         */
-       if (strict_strtoul(buf + 4, 0, &mapped_lun) || mapped_lun > UINT_MAX) {
+       ret = kstrtoul(buf + 4, 0, &mapped_lun);
+       if (ret)
+               goto out;
+       if (mapped_lun > UINT_MAX) {
                ret = -EINVAL;
                goto out;
        }
@@ -875,7 +880,10 @@ static struct config_group *target_fabric_make_lun(
                                " \"lun_$LUN_NUMBER\"\n");
                return ERR_PTR(-EINVAL);
        }
-       if (strict_strtoul(name + 4, 0, &unpacked_lun) || unpacked_lun > UINT_MAX)
+       errno = kstrtoul(name + 4, 0, &unpacked_lun);
+       if (errno)
+               return ERR_PTR(errno);
+       if (unpacked_lun > UINT_MAX)
                return ERR_PTR(-EINVAL);
 
        lun = core_get_lun_from_tpg(se_tpg, unpacked_lun);
index 687b0b0..0d1cf8b 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains generic high level protocol identifier and PR
  * handlers for TCM fabric modules
  *
- * (c) Copyright 2010-2012 RisingTide Systems LLC.
+ * (c) Copyright 2010-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index b11890d..b662f89 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains the Storage Engine <-> FILEIO transport specific functions
  *
- * (c) Copyright 2005-2012 RisingTide Systems LLC.
+ * (c) Copyright 2005-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -547,11 +547,9 @@ fd_execute_unmap(struct se_cmd *cmd)
 }
 
 static sense_reason_t
-fd_execute_rw(struct se_cmd *cmd)
+fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+             enum dma_data_direction data_direction)
 {
-       struct scatterlist *sgl = cmd->t_data_sg;
-       u32 sgl_nents = cmd->t_data_nents;
-       enum dma_data_direction data_direction = cmd->data_direction;
        struct se_device *dev = cmd->se_dev;
        int ret = 0;
 
@@ -635,10 +633,10 @@ static ssize_t fd_set_configfs_dev_params(struct se_device *dev,
                                ret = -ENOMEM;
                                break;
                        }
-                       ret = strict_strtoull(arg_p, 0, &fd_dev->fd_dev_size);
+                       ret = kstrtoull(arg_p, 0, &fd_dev->fd_dev_size);
                        kfree(arg_p);
                        if (ret < 0) {
-                               pr_err("strict_strtoull() failed for"
+                               pr_err("kstrtoull() failed for"
                                                " fd_dev_size=\n");
                                goto out;
                        }
index d2616cd..a25051a 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains the TCM HBA Transport related functions.
  *
- * (c) Copyright 2003-2012 RisingTide Systems LLC.
+ * (c) Copyright 2003-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
index aa1620a..b9a3394 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains the Storage Engine  <-> Linux BlockIO transport
  * specific functions.
  *
- * (c) Copyright 2003-2012 RisingTide Systems LLC.
+ * (c) Copyright 2003-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -536,10 +536,10 @@ static ssize_t iblock_set_configfs_dev_params(struct se_device *dev,
                                ret = -ENOMEM;
                                break;
                        }
-                       ret = strict_strtoul(arg_p, 0, &tmp_readonly);
+                       ret = kstrtoul(arg_p, 0, &tmp_readonly);
                        kfree(arg_p);
                        if (ret < 0) {
-                               pr_err("strict_strtoul() failed for"
+                               pr_err("kstrtoul() failed for"
                                                " readonly=\n");
                                goto out;
                        }
@@ -587,11 +587,9 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b)
 }
 
 static sense_reason_t
-iblock_execute_rw(struct se_cmd *cmd)
+iblock_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+                 enum dma_data_direction data_direction)
 {
-       struct scatterlist *sgl = cmd->t_data_sg;
-       u32 sgl_nents = cmd->t_data_nents;
-       enum dma_data_direction data_direction = cmd->data_direction;
        struct se_device *dev = cmd->se_dev;
        struct iblock_req *ibr;
        struct bio *bio;
index 18d49df..579128a 100644 (file)
@@ -33,6 +33,8 @@ int   se_dev_set_emulate_ua_intlck_ctrl(struct se_device *, int);
 int    se_dev_set_emulate_tas(struct se_device *, int);
 int    se_dev_set_emulate_tpu(struct se_device *, int);
 int    se_dev_set_emulate_tpws(struct se_device *, int);
+int    se_dev_set_emulate_caw(struct se_device *, int);
+int    se_dev_set_emulate_3pc(struct se_device *, int);
 int    se_dev_set_enforce_pr_isids(struct se_device *, int);
 int    se_dev_set_is_nonrot(struct se_device *, int);
 int    se_dev_set_emulate_rest_reord(struct se_device *dev, int);
index bd78faf..d1ae4c5 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains SPC-3 compliant persistent reservations and
  * legacy SPC-2 reservations with compatible reservation handling (CRH=1)
  *
- * (c) Copyright 2009-2012 RisingTide Systems LLC.
+ * (c) Copyright 2009-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -1949,7 +1949,7 @@ static int __core_scsi3_write_aptpl_to_file(
                pr_debug("Error writing APTPL metadata file: %s\n", path);
        fput(file);
 
-       return ret ? -EIO : 0;
+       return (ret < 0) ? -EIO : 0;
 }
 
 /*
index e992b27..551c96c 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains the generic target mode <-> Linux SCSI subsystem plugin.
  *
- * (c) Copyright 2003-2012 RisingTide Systems LLC.
+ * (c) Copyright 2003-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -1050,9 +1050,8 @@ pscsi_execute_cmd(struct se_cmd *cmd)
                req = blk_get_request(pdv->pdv_sd->request_queue,
                                (data_direction == DMA_TO_DEVICE),
                                GFP_KERNEL);
-               if (!req || IS_ERR(req)) {
-                       pr_err("PSCSI: blk_get_request() failed: %ld\n",
-                                       req ? IS_ERR(req) : -ENOMEM);
+               if (!req) {
+                       pr_err("PSCSI: blk_get_request() failed\n");
                        ret = TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
                        goto fail;
                }
index 51127d1..131327a 100644 (file)
@@ -4,7 +4,7 @@
  * This file contains the Storage Engine <-> Ramdisk transport
  * specific functions.
  *
- * (c) Copyright 2003-2012 RisingTide Systems LLC.
+ * (c) Copyright 2003-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -280,11 +280,9 @@ static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page)
 }
 
 static sense_reason_t
-rd_execute_rw(struct se_cmd *cmd)
+rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
+             enum dma_data_direction data_direction)
 {
-       struct scatterlist *sgl = cmd->t_data_sg;
-       u32 sgl_nents = cmd->t_data_nents;
-       enum dma_data_direction data_direction = cmd->data_direction;
        struct se_device *se_dev = cmd->se_dev;
        struct rd_dev *dev = RD_DEV(se_dev);
        struct rd_dev_sg_table *table;
index 8a46277..6c17295 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * SCSI Block Commands (SBC) parsing and emulation.
  *
- * (c) Copyright 2002-2012 RisingTide Systems LLC.
+ * (c) Copyright 2002-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -25,6 +25,7 @@
 #include <linux/ratelimit.h>
 #include <asm/unaligned.h>
 #include <scsi/scsi.h>
+#include <scsi/scsi_tcq.h>
 
 #include <target/target_core_base.h>
 #include <target/target_core_backend.h>
@@ -280,13 +281,13 @@ sbc_setup_write_same(struct se_cmd *cmd, unsigned char *flags, struct sbc_ops *o
        return 0;
 }
 
-static void xdreadwrite_callback(struct se_cmd *cmd)
+static sense_reason_t xdreadwrite_callback(struct se_cmd *cmd)
 {
        unsigned char *buf, *addr;
        struct scatterlist *sg;
        unsigned int offset;
-       int i;
-       int count;
+       sense_reason_t ret = TCM_NO_SENSE;
+       int i, count;
        /*
         * From sbc3r22.pdf section 5.48 XDWRITEREAD (10) command
         *
@@ -301,7 +302,7 @@ static void xdreadwrite_callback(struct se_cmd *cmd)
        buf = kmalloc(cmd->data_length, GFP_KERNEL);
        if (!buf) {
                pr_err("Unable to allocate xor_callback buf\n");
-               return;
+               return TCM_OUT_OF_RESOURCES;
        }
        /*
         * Copy the scatterlist WRITE buffer located at cmd->t_data_sg
@@ -320,8 +321,10 @@ static void xdreadwrite_callback(struct se_cmd *cmd)
        offset = 0;
        for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, count) {
                addr = kmap_atomic(sg_page(sg));
-               if (!addr)
+               if (!addr) {
+                       ret = TCM_OUT_OF_RESOURCES;
                        goto out;
+               }
 
                for (i = 0; i < sg->length; i++)
                        *(addr + sg->offset + i) ^= *(buf + offset + i);
@@ -332,6 +335,193 @@ static void xdreadwrite_callback(struct se_cmd *cmd)
 
 out:
        kfree(buf);
+       return ret;
+}
+
+static sense_reason_t
+sbc_execute_rw(struct se_cmd *cmd)
+{
+       return cmd->execute_rw(cmd, cmd->t_data_sg, cmd->t_data_nents,
+                              cmd->data_direction);
+}
+
+static sense_reason_t compare_and_write_post(struct se_cmd *cmd)
+{
+       struct se_device *dev = cmd->se_dev;
+
+       cmd->se_cmd_flags |= SCF_COMPARE_AND_WRITE_POST;
+       /*
+        * Unlock ->caw_sem originally obtained during sbc_compare_and_write()
+        * before the original READ I/O submission.
+        */
+       up(&dev->caw_sem);
+
+       return TCM_NO_SENSE;
+}
+
+static sense_reason_t compare_and_write_callback(struct se_cmd *cmd)
+{
+       struct se_device *dev = cmd->se_dev;
+       struct scatterlist *write_sg = NULL, *sg;
+       unsigned char *buf, *addr;
+       struct sg_mapping_iter m;
+       unsigned int offset = 0, len;
+       unsigned int nlbas = cmd->t_task_nolb;
+       unsigned int block_size = dev->dev_attrib.block_size;
+       unsigned int compare_len = (nlbas * block_size);
+       sense_reason_t ret = TCM_NO_SENSE;
+       int rc, i;
+
+       /*
+        * Handle early failure in transport_generic_request_failure(),
+        * which will not have taken ->caw_mutex yet..
+        */
+       if (!cmd->t_data_sg || !cmd->t_bidi_data_sg)
+               return TCM_NO_SENSE;
+
+       buf = kzalloc(cmd->data_length, GFP_KERNEL);
+       if (!buf) {
+               pr_err("Unable to allocate compare_and_write buf\n");
+               ret = TCM_OUT_OF_RESOURCES;
+               goto out;
+       }
+
+       write_sg = kzalloc(sizeof(struct scatterlist) * cmd->t_data_nents,
+                          GFP_KERNEL);
+       if (!write_sg) {
+               pr_err("Unable to allocate compare_and_write sg\n");
+               ret = TCM_OUT_OF_RESOURCES;
+               goto out;
+       }
+       /*
+        * Setup verify and write data payloads from total NumberLBAs.
+        */
+       rc = sg_copy_to_buffer(cmd->t_data_sg, cmd->t_data_nents, buf,
+                              cmd->data_length);
+       if (!rc) {
+               pr_err("sg_copy_to_buffer() failed for compare_and_write\n");
+               ret = TCM_OUT_OF_RESOURCES;
+               goto out;
+       }
+       /*
+        * Compare against SCSI READ payload against verify payload
+        */
+       for_each_sg(cmd->t_bidi_data_sg, sg, cmd->t_bidi_data_nents, i) {
+               addr = (unsigned char *)kmap_atomic(sg_page(sg));
+               if (!addr) {
+                       ret = TCM_OUT_OF_RESOURCES;
+                       goto out;
+               }
+
+               len = min(sg->length, compare_len);
+
+               if (memcmp(addr, buf + offset, len)) {
+                       pr_warn("Detected MISCOMPARE for addr: %p buf: %p\n",
+                               addr, buf + offset);
+                       kunmap_atomic(addr);
+                       goto miscompare;
+               }
+               kunmap_atomic(addr);
+
+               offset += len;
+               compare_len -= len;
+               if (!compare_len)
+                       break;
+       }
+
+       i = 0;
+       len = cmd->t_task_nolb * block_size;
+       sg_miter_start(&m, cmd->t_data_sg, cmd->t_data_nents, SG_MITER_TO_SG);
+       /*
+        * Currently assumes NoLB=1 and SGLs are PAGE_SIZE..
+        */
+       while (len) {
+               sg_miter_next(&m);
+
+               if (block_size < PAGE_SIZE) {
+                       sg_set_page(&write_sg[i], m.page, block_size,
+                                   block_size);
+               } else {
+                       sg_miter_next(&m);
+                       sg_set_page(&write_sg[i], m.page, block_size,
+                                   0);
+               }
+               len -= block_size;
+               i++;
+       }
+       sg_miter_stop(&m);
+       /*
+        * Save the original SGL + nents values before updating to new
+        * assignments, to be released in transport_free_pages() ->
+        * transport_reset_sgl_orig()
+        */
+       cmd->t_data_sg_orig = cmd->t_data_sg;
+       cmd->t_data_sg = write_sg;
+       cmd->t_data_nents_orig = cmd->t_data_nents;
+       cmd->t_data_nents = 1;
+
+       cmd->sam_task_attr = MSG_HEAD_TAG;
+       cmd->transport_complete_callback = compare_and_write_post;
+       /*
+        * Now reset ->execute_cmd() to the normal sbc_execute_rw() handler
+        * for submitting the adjusted SGL to write instance user-data.
+        */
+       cmd->execute_cmd = sbc_execute_rw;
+
+       spin_lock_irq(&cmd->t_state_lock);
+       cmd->t_state = TRANSPORT_PROCESSING;
+       cmd->transport_state |= CMD_T_ACTIVE|CMD_T_BUSY|CMD_T_SENT;
+       spin_unlock_irq(&cmd->t_state_lock);
+
+       __target_execute_cmd(cmd);
+
+       kfree(buf);
+       return ret;
+
+miscompare:
+       pr_warn("Target/%s: Send MISCOMPARE check condition and sense\n",
+               dev->transport->name);
+       ret = TCM_MISCOMPARE_VERIFY;
+out:
+       /*
+        * In the MISCOMPARE or failure case, unlock ->caw_sem obtained in
+        * sbc_compare_and_write() before the original READ I/O submission.
+        */
+       up(&dev->caw_sem);
+       kfree(write_sg);
+       kfree(buf);
+       return ret;
+}
+
+static sense_reason_t
+sbc_compare_and_write(struct se_cmd *cmd)
+{
+       struct se_device *dev = cmd->se_dev;
+       sense_reason_t ret;
+       int rc;
+       /*
+        * Submit the READ first for COMPARE_AND_WRITE to perform the
+        * comparision using SGLs at cmd->t_bidi_data_sg..
+        */
+       rc = down_interruptible(&dev->caw_sem);
+       if ((rc != 0) || signal_pending(current)) {
+               cmd->transport_complete_callback = NULL;
+               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+       }
+
+       ret = cmd->execute_rw(cmd, cmd->t_bidi_data_sg, cmd->t_bidi_data_nents,
+                             DMA_FROM_DEVICE);
+       if (ret) {
+               cmd->transport_complete_callback = NULL;
+               up(&dev->caw_sem);
+               return ret;
+       }
+       /*
+        * Unlock of dev->caw_sem to occur in compare_and_write_callback()
+        * upon MISCOMPARE, or in compare_and_write_done() upon completion
+        * of WRITE instance user-data.
+        */
+       return TCM_NO_SENSE;
 }
 
 sense_reason_t
@@ -348,31 +538,36 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                sectors = transport_get_sectors_6(cdb);
                cmd->t_task_lba = transport_lba_21(cdb);
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case READ_10:
                sectors = transport_get_sectors_10(cdb);
                cmd->t_task_lba = transport_lba_32(cdb);
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case READ_12:
                sectors = transport_get_sectors_12(cdb);
                cmd->t_task_lba = transport_lba_32(cdb);
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case READ_16:
                sectors = transport_get_sectors_16(cdb);
                cmd->t_task_lba = transport_lba_64(cdb);
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case WRITE_6:
                sectors = transport_get_sectors_6(cdb);
                cmd->t_task_lba = transport_lba_21(cdb);
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case WRITE_10:
        case WRITE_VERIFY:
@@ -381,7 +576,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (cdb[1] & 0x8)
                        cmd->se_cmd_flags |= SCF_FUA;
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case WRITE_12:
                sectors = transport_get_sectors_12(cdb);
@@ -389,7 +585,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (cdb[1] & 0x8)
                        cmd->se_cmd_flags |= SCF_FUA;
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case WRITE_16:
                sectors = transport_get_sectors_16(cdb);
@@ -397,7 +594,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                if (cdb[1] & 0x8)
                        cmd->se_cmd_flags |= SCF_FUA;
                cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                break;
        case XDWRITEREAD_10:
                if (cmd->data_direction != DMA_TO_DEVICE ||
@@ -411,7 +609,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                /*
                 * Setup BIDI XOR callback to be run after I/O completion.
                 */
-               cmd->execute_cmd = ops->execute_rw;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_execute_rw;
                cmd->transport_complete_callback = &xdreadwrite_callback;
                if (cdb[1] & 0x8)
                        cmd->se_cmd_flags |= SCF_FUA;
@@ -434,7 +633,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                         * Setup BIDI XOR callback to be run during after I/O
                         * completion.
                         */
-                       cmd->execute_cmd = ops->execute_rw;
+                       cmd->execute_rw = ops->execute_rw;
+                       cmd->execute_cmd = sbc_execute_rw;
                        cmd->transport_complete_callback = &xdreadwrite_callback;
                        if (cdb[1] & 0x8)
                                cmd->se_cmd_flags |= SCF_FUA;
@@ -461,6 +661,28 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                }
                break;
        }
+       case COMPARE_AND_WRITE:
+               sectors = cdb[13];
+               /*
+                * Currently enforce COMPARE_AND_WRITE for a single sector
+                */
+               if (sectors > 1) {
+                       pr_err("COMPARE_AND_WRITE contains NoLB: %u greater"
+                              " than 1\n", sectors);
+                       return TCM_INVALID_CDB_FIELD;
+               }
+               /*
+                * Double size because we have two buffers, note that
+                * zero is not an error..
+                */
+               size = 2 * sbc_get_size(cmd, sectors);
+               cmd->t_task_lba = get_unaligned_be64(&cdb[2]);
+               cmd->t_task_nolb = sectors;
+               cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB | SCF_COMPARE_AND_WRITE;
+               cmd->execute_rw = ops->execute_rw;
+               cmd->execute_cmd = sbc_compare_and_write;
+               cmd->transport_complete_callback = compare_and_write_callback;
+               break;
        case READ_CAPACITY:
                size = READ_CAP_LEN;
                cmd->execute_cmd = sbc_emulate_readcapacity;
@@ -600,7 +822,8 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops)
                        return TCM_ADDRESS_OUT_OF_RANGE;
                }
 
-               size = sbc_get_size(cmd, sectors);
+               if (!(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE))
+                       size = sbc_get_size(cmd, sectors);
        }
 
        return target_cmd_size_check(cmd, size);
index 9fabbf7..0745395 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * SCSI Primary Commands (SPC) parsing and emulation.
  *
- * (c) Copyright 2002-2012 RisingTide Systems LLC.
+ * (c) Copyright 2002-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -35,7 +35,7 @@
 #include "target_core_alua.h"
 #include "target_core_pr.h"
 #include "target_core_ua.h"
-
+#include "target_core_xcopy.h"
 
 static void spc_fill_alua_data(struct se_port *port, unsigned char *buf)
 {
@@ -95,6 +95,12 @@ spc_emulate_inquiry_std(struct se_cmd *cmd, unsigned char *buf)
         */
        spc_fill_alua_data(lun->lun_sep, buf);
 
+       /*
+        * Set Third-Party Copy (3PC) bit to indicate support for EXTENDED_COPY
+        */
+       if (dev->dev_attrib.emulate_3pc)
+               buf[5] |= 0x8;
+
        buf[7] = 0x2; /* CmdQue=1 */
 
        memcpy(&buf[8], "LIO-ORG ", 8);
@@ -129,8 +135,8 @@ spc_emulate_evpd_80(struct se_cmd *cmd, unsigned char *buf)
        return 0;
 }
 
-static void spc_parse_naa_6h_vendor_specific(struct se_device *dev,
-               unsigned char *buf)
+void spc_parse_naa_6h_vendor_specific(struct se_device *dev,
+                                     unsigned char *buf)
 {
        unsigned char *p = &dev->t10_wwn.unit_serial[0];
        int cnt;
@@ -460,6 +466,11 @@ spc_emulate_evpd_b0(struct se_cmd *cmd, unsigned char *buf)
 
        /* Set WSNZ to 1 */
        buf[4] = 0x01;
+       /*
+        * Set MAXIMUM COMPARE AND WRITE LENGTH
+        */
+       if (dev->dev_attrib.emulate_caw)
+               buf[5] = 0x01;
 
        /*
         * Set OPTIMAL TRANSFER LENGTH GRANULARITY
@@ -1250,8 +1261,14 @@ spc_parse_cdb(struct se_cmd *cmd, unsigned int *size)
                *size = (cdb[6] << 24) | (cdb[7] << 16) | (cdb[8] << 8) | cdb[9];
                break;
        case EXTENDED_COPY:
-       case READ_ATTRIBUTE:
+               *size = get_unaligned_be32(&cdb[10]);
+               cmd->execute_cmd = target_do_xcopy;
+               break;
        case RECEIVE_COPY_RESULTS:
+               *size = get_unaligned_be32(&cdb[10]);
+               cmd->execute_cmd = target_do_receive_copy_results;
+               break;
+       case READ_ATTRIBUTE:
        case WRITE_ATTRIBUTE:
                *size = (cdb[10] << 24) | (cdb[11] << 16) |
                       (cdb[12] << 8) | cdb[13];
index d154ce7..9c642e0 100644 (file)
@@ -4,7 +4,7 @@
  * Modern ConfigFS group context specific statistics based on original
  * target_core_mib.c code
  *
- * (c) Copyright 2006-2012 RisingTide Systems LLC.
+ * (c) Copyright 2006-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@linux-iscsi.org>
  *
index 0d7cacb..2500099 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains SPC-3 task management infrastructure
  *
- * (c) Copyright 2009-2012 RisingTide Systems LLC.
+ * (c) Copyright 2009-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
index aac9d27..b9a6ec0 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains generic Target Portal Group related functions.
  *
- * (c) Copyright 2002-2012 RisingTide Systems LLC.
+ * (c) Copyright 2002-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
index d8e49d7..84747cc 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains the Generic Target Engine Core.
  *
- * (c) Copyright 2002-2012 RisingTide Systems LLC.
+ * (c) Copyright 2002-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
@@ -67,7 +67,6 @@ struct kmem_cache *t10_alua_tg_pt_gp_mem_cache;
 static void transport_complete_task_attr(struct se_cmd *cmd);
 static void transport_handle_queue_full(struct se_cmd *cmd,
                struct se_device *dev);
-static int transport_generic_get_mem(struct se_cmd *cmd);
 static int transport_put_cmd(struct se_cmd *cmd);
 static void target_complete_ok_work(struct work_struct *work);
 
@@ -232,6 +231,50 @@ struct se_session *transport_init_session(void)
 }
 EXPORT_SYMBOL(transport_init_session);
 
+int transport_alloc_session_tags(struct se_session *se_sess,
+                                unsigned int tag_num, unsigned int tag_size)
+{
+       int rc;
+
+       se_sess->sess_cmd_map = kzalloc(tag_num * tag_size, GFP_KERNEL);
+       if (!se_sess->sess_cmd_map) {
+               pr_err("Unable to allocate se_sess->sess_cmd_map\n");
+               return -ENOMEM;
+       }
+
+       rc = percpu_ida_init(&se_sess->sess_tag_pool, tag_num);
+       if (rc < 0) {
+               pr_err("Unable to init se_sess->sess_tag_pool,"
+                       " tag_num: %u\n", tag_num);
+               kfree(se_sess->sess_cmd_map);
+               se_sess->sess_cmd_map = NULL;
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL(transport_alloc_session_tags);
+
+struct se_session *transport_init_session_tags(unsigned int tag_num,
+                                              unsigned int tag_size)
+{
+       struct se_session *se_sess;
+       int rc;
+
+       se_sess = transport_init_session();
+       if (IS_ERR(se_sess))
+               return se_sess;
+
+       rc = transport_alloc_session_tags(se_sess, tag_num, tag_size);
+       if (rc < 0) {
+               transport_free_session(se_sess);
+               return ERR_PTR(-ENOMEM);
+       }
+
+       return se_sess;
+}
+EXPORT_SYMBOL(transport_init_session_tags);
+
 /*
  * Called with spin_lock_irqsave(&struct se_portal_group->session_lock called.
  */
@@ -367,6 +410,10 @@ EXPORT_SYMBOL(transport_deregister_session_configfs);
 
 void transport_free_session(struct se_session *se_sess)
 {
+       if (se_sess->sess_cmd_map) {
+               percpu_ida_destroy(&se_sess->sess_tag_pool);
+               kfree(se_sess->sess_cmd_map);
+       }
        kmem_cache_free(se_sess_cache, se_sess);
 }
 EXPORT_SYMBOL(transport_free_session);
@@ -1206,7 +1253,7 @@ int transport_handle_cdb_direct(
 }
 EXPORT_SYMBOL(transport_handle_cdb_direct);
 
-static sense_reason_t
+sense_reason_t
 transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl,
                u32 sgl_count, struct scatterlist *sgl_bidi, u32 sgl_bidi_count)
 {
@@ -1512,6 +1559,13 @@ void transport_generic_request_failure(struct se_cmd *cmd,
         * For SAM Task Attribute emulation for failed struct se_cmd
         */
        transport_complete_task_attr(cmd);
+       /*
+        * Handle special case for COMPARE_AND_WRITE failure, where the
+        * callback is expected to drop the per device ->caw_mutex.
+        */
+       if ((cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE) &&
+            cmd->transport_complete_callback)
+               cmd->transport_complete_callback(cmd);
 
        switch (sense_reason) {
        case TCM_NON_EXISTENT_LUN:
@@ -1579,7 +1633,7 @@ queue_full:
 }
 EXPORT_SYMBOL(transport_generic_request_failure);
 
-static void __target_execute_cmd(struct se_cmd *cmd)
+void __target_execute_cmd(struct se_cmd *cmd)
 {
        sense_reason_t ret;
 
@@ -1784,7 +1838,7 @@ static void transport_complete_qf(struct se_cmd *cmd)
                ret = cmd->se_tfo->queue_data_in(cmd);
                break;
        case DMA_TO_DEVICE:
-               if (cmd->t_bidi_data_sg) {
+               if (cmd->se_cmd_flags & SCF_BIDI) {
                        ret = cmd->se_tfo->queue_data_in(cmd);
                        if (ret < 0)
                                break;
@@ -1856,10 +1910,25 @@ static void target_complete_ok_work(struct work_struct *work)
        }
        /*
         * Check for a callback, used by amongst other things
-        * XDWRITE_READ_10 emulation.
+        * XDWRITE_READ_10 and COMPARE_AND_WRITE emulation.
         */
-       if (cmd->transport_complete_callback)
-               cmd->transport_complete_callback(cmd);
+       if (cmd->transport_complete_callback) {
+               sense_reason_t rc;
+
+               rc = cmd->transport_complete_callback(cmd);
+               if (!rc && !(cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE_POST)) {
+                       return;
+               } else if (rc) {
+                       ret = transport_send_check_condition_and_sense(cmd,
+                                               rc, 0);
+                       if (ret == -EAGAIN || ret == -ENOMEM)
+                               goto queue_full;
+
+                       transport_lun_remove_cmd(cmd);
+                       transport_cmd_check_stop_to_fabric(cmd);
+                       return;
+               }
+       }
 
        switch (cmd->data_direction) {
        case DMA_FROM_DEVICE:
@@ -1885,7 +1954,7 @@ static void target_complete_ok_work(struct work_struct *work)
                /*
                 * Check if we need to send READ payload for BIDI-COMMAND
                 */
-               if (cmd->t_bidi_data_sg) {
+               if (cmd->se_cmd_flags & SCF_BIDI) {
                        spin_lock(&cmd->se_lun->lun_sep_lock);
                        if (cmd->se_lun->lun_sep) {
                                cmd->se_lun->lun_sep->sep_stats.tx_data_octets +=
@@ -1930,10 +1999,29 @@ static inline void transport_free_sgl(struct scatterlist *sgl, int nents)
        kfree(sgl);
 }
 
+static inline void transport_reset_sgl_orig(struct se_cmd *cmd)
+{
+       /*
+        * Check for saved t_data_sg that may be used for COMPARE_AND_WRITE
+        * emulation, and free + reset pointers if necessary..
+        */
+       if (!cmd->t_data_sg_orig)
+               return;
+
+       kfree(cmd->t_data_sg);
+       cmd->t_data_sg = cmd->t_data_sg_orig;
+       cmd->t_data_sg_orig = NULL;
+       cmd->t_data_nents = cmd->t_data_nents_orig;
+       cmd->t_data_nents_orig = 0;
+}
+
 static inline void transport_free_pages(struct se_cmd *cmd)
 {
-       if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC)
+       if (cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) {
+               transport_reset_sgl_orig(cmd);
                return;
+       }
+       transport_reset_sgl_orig(cmd);
 
        transport_free_sgl(cmd->t_data_sg, cmd->t_data_nents);
        cmd->t_data_sg = NULL;
@@ -2029,24 +2117,22 @@ void transport_kunmap_data_sg(struct se_cmd *cmd)
 }
 EXPORT_SYMBOL(transport_kunmap_data_sg);
 
-static int
-transport_generic_get_mem(struct se_cmd *cmd)
+int
+target_alloc_sgl(struct scatterlist **sgl, unsigned int *nents, u32 length,
+                bool zero_page)
 {
-       u32 length = cmd->data_length;
-       unsigned int nents;
+       struct scatterlist *sg;
        struct page *page;
-       gfp_t zero_flag;
+       gfp_t zero_flag = (zero_page) ? __GFP_ZERO : 0;
+       unsigned int nent;
        int i = 0;
 
-       nents = DIV_ROUND_UP(length, PAGE_SIZE);
-       cmd->t_data_sg = kmalloc(sizeof(struct scatterlist) * nents, GFP_KERNEL);
-       if (!cmd->t_data_sg)
+       nent = DIV_ROUND_UP(length, PAGE_SIZE);
+       sg = kmalloc(sizeof(struct scatterlist) * nent, GFP_KERNEL);
+       if (!sg)
                return -ENOMEM;
 
-       cmd->t_data_nents = nents;
-       sg_init_table(cmd->t_data_sg, nents);
-
-       zero_flag = cmd->se_cmd_flags & SCF_SCSI_DATA_CDB ? 0 : __GFP_ZERO;
+       sg_init_table(sg, nent);
 
        while (length) {
                u32 page_len = min_t(u32, length, PAGE_SIZE);
@@ -2054,19 +2140,20 @@ transport_generic_get_mem(struct se_cmd *cmd)
                if (!page)
                        goto out;
 
-               sg_set_page(&cmd->t_data_sg[i], page, page_len, 0);
+               sg_set_page(&sg[i], page, page_len, 0);
                length -= page_len;
                i++;
        }
+       *sgl = sg;
+       *nents = nent;
        return 0;
 
 out:
        while (i > 0) {
                i--;
-               __free_page(sg_page(&cmd->t_data_sg[i]));
+               __free_page(sg_page(&sg[i]));
        }
-       kfree(cmd->t_data_sg);
-       cmd->t_data_sg = NULL;
+       kfree(sg);
        return -ENOMEM;
 }
 
@@ -2087,7 +2174,27 @@ transport_generic_new_cmd(struct se_cmd *cmd)
         */
        if (!(cmd->se_cmd_flags & SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC) &&
            cmd->data_length) {
-               ret = transport_generic_get_mem(cmd);
+               bool zero_flag = !(cmd->se_cmd_flags & SCF_SCSI_DATA_CDB);
+
+               if ((cmd->se_cmd_flags & SCF_BIDI) ||
+                   (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)) {
+                       u32 bidi_length;
+
+                       if (cmd->se_cmd_flags & SCF_COMPARE_AND_WRITE)
+                               bidi_length = cmd->t_task_nolb *
+                                             cmd->se_dev->dev_attrib.block_size;
+                       else
+                               bidi_length = cmd->data_length;
+
+                       ret = target_alloc_sgl(&cmd->t_bidi_data_sg,
+                                              &cmd->t_bidi_data_nents,
+                                              bidi_length, zero_flag);
+                       if (ret < 0)
+                               return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
+               }
+
+               ret = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
+                                      cmd->data_length, zero_flag);
                if (ret < 0)
                        return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
        }
@@ -2740,6 +2847,15 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd,
                buffer[SPC_ASC_KEY_OFFSET] = asc;
                buffer[SPC_ASCQ_KEY_OFFSET] = ascq;
                break;
+       case TCM_MISCOMPARE_VERIFY:
+               /* CURRENT ERROR */
+               buffer[0] = 0x70;
+               buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10;
+               buffer[SPC_SENSE_KEY_OFFSET] = MISCOMPARE;
+               /* MISCOMPARE DURING VERIFY OPERATION */
+               buffer[SPC_ASC_KEY_OFFSET] = 0x1d;
+               buffer[SPC_ASCQ_KEY_OFFSET] = 0x00;
+               break;
        case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE:
        default:
                /* CURRENT ERROR */
index bf0e390..b04467e 100644 (file)
@@ -3,7 +3,7 @@
  *
  * This file contains logic for SPC-3 Unit Attention emulation
  *
- * (c) Copyright 2009-2012 RisingTide Systems LLC.
+ * (c) Copyright 2009-2013 Datera, Inc.
  *
  * Nicholas A. Bellinger <nab@kernel.org>
  *
diff --git a/drivers/target/target_core_xcopy.c b/drivers/target/target_core_xcopy.c
new file mode 100644 (file)
index 0000000..4d22e7d
--- /dev/null
@@ -0,0 +1,1081 @@
+/*******************************************************************************
+ * Filename: target_core_xcopy.c
+ *
+ * This file contains support for SPC-4 Extended-Copy offload with generic
+ * TCM backends.
+ *
+ * Copyright (c) 2011-2013 Datera, Inc. All rights reserved.
+ *
+ * Author:
+ * Nicholas A. Bellinger <nab@daterainc.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ ******************************************************************************/
+
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/list.h>
+#include <linux/configfs.h>
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <asm/unaligned.h>
+
+#include <target/target_core_base.h>
+#include <target/target_core_backend.h>
+#include <target/target_core_fabric.h>
+#include <target/target_core_configfs.h>
+
+#include "target_core_pr.h"
+#include "target_core_ua.h"
+#include "target_core_xcopy.h"
+
+static struct workqueue_struct *xcopy_wq = NULL;
+/*
+ * From target_core_spc.c
+ */
+extern void spc_parse_naa_6h_vendor_specific(struct se_device *, unsigned char *);
+/*
+ * From target_core_device.c
+ */
+extern struct mutex g_device_mutex;
+extern struct list_head g_device_list;
+/*
+ * From target_core_configfs.c
+ */
+extern struct configfs_subsystem *target_core_subsystem[];
+
+static int target_xcopy_gen_naa_ieee(struct se_device *dev, unsigned char *buf)
+{
+       int off = 0;
+
+       buf[off++] = (0x6 << 4);
+       buf[off++] = 0x01;
+       buf[off++] = 0x40;
+       buf[off] = (0x5 << 4);
+
+       spc_parse_naa_6h_vendor_specific(dev, &buf[off]);
+       return 0;
+}
+
+static int target_xcopy_locate_se_dev_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
+                                       bool src)
+{
+       struct se_device *se_dev;
+       struct configfs_subsystem *subsys = target_core_subsystem[0];
+       unsigned char tmp_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN], *dev_wwn;
+       int rc;
+
+       if (src == true)
+               dev_wwn = &xop->dst_tid_wwn[0];
+       else
+               dev_wwn = &xop->src_tid_wwn[0];
+
+       mutex_lock(&g_device_mutex);
+       list_for_each_entry(se_dev, &g_device_list, g_dev_node) {
+
+               memset(&tmp_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
+               target_xcopy_gen_naa_ieee(se_dev, &tmp_dev_wwn[0]);
+
+               rc = memcmp(&tmp_dev_wwn[0], dev_wwn, XCOPY_NAA_IEEE_REGEX_LEN);
+               if (rc != 0)
+                       continue;
+
+               if (src == true) {
+                       xop->dst_dev = se_dev;
+                       pr_debug("XCOPY 0xe4: Setting xop->dst_dev: %p from located"
+                               " se_dev\n", xop->dst_dev);
+               } else {
+                       xop->src_dev = se_dev;
+                       pr_debug("XCOPY 0xe4: Setting xop->src_dev: %p from located"
+                               " se_dev\n", xop->src_dev);
+               }
+
+               rc = configfs_depend_item(subsys,
+                               &se_dev->dev_group.cg_item);
+               if (rc != 0) {
+                       pr_err("configfs_depend_item attempt failed:"
+                               " %d for se_dev: %p\n", rc, se_dev);
+                       mutex_unlock(&g_device_mutex);
+                       return rc;
+               }
+
+               pr_debug("Called configfs_depend_item for subsys: %p se_dev: %p"
+                       " se_dev->se_dev_group: %p\n", subsys, se_dev,
+                       &se_dev->dev_group);
+
+               mutex_unlock(&g_device_mutex);
+               return 0;
+       }
+       mutex_unlock(&g_device_mutex);
+
+       pr_err("Unable to locate 0xe4 descriptor for EXTENDED_COPY\n");
+       return -EINVAL;
+}
+
+static int target_xcopy_parse_tiddesc_e4(struct se_cmd *se_cmd, struct xcopy_op *xop,
+                               unsigned char *p, bool src)
+{
+       unsigned char *desc = p;
+       unsigned short ript;
+       u8 desig_len;
+       /*
+        * Extract RELATIVE INITIATOR PORT IDENTIFIER
+        */
+       ript = get_unaligned_be16(&desc[2]);
+       pr_debug("XCOPY 0xe4: RELATIVE INITIATOR PORT IDENTIFIER: %hu\n", ript);
+       /*
+        * Check for supported code set, association, and designator type
+        */
+       if ((desc[4] & 0x0f) != 0x1) {
+               pr_err("XCOPY 0xe4: code set of non binary type not supported\n");
+               return -EINVAL;
+       }
+       if ((desc[5] & 0x30) != 0x00) {
+               pr_err("XCOPY 0xe4: association other than LUN not supported\n");
+               return -EINVAL;
+       }
+       if ((desc[5] & 0x0f) != 0x3) {
+               pr_err("XCOPY 0xe4: designator type unsupported: 0x%02x\n",
+                               (desc[5] & 0x0f));
+               return -EINVAL;
+       }
+       /*
+        * Check for matching 16 byte length for NAA IEEE Registered Extended
+        * Assigned designator
+        */
+       desig_len = desc[7];
+       if (desig_len != 16) {
+               pr_err("XCOPY 0xe4: invalid desig_len: %d\n", (int)desig_len);
+               return -EINVAL;
+       }
+       pr_debug("XCOPY 0xe4: desig_len: %d\n", (int)desig_len);
+       /*
+        * Check for NAA IEEE Registered Extended Assigned header..
+        */
+       if ((desc[8] & 0xf0) != 0x60) {
+               pr_err("XCOPY 0xe4: Unsupported DESIGNATOR TYPE: 0x%02x\n",
+                                       (desc[8] & 0xf0));
+               return -EINVAL;
+       }
+
+       if (src == true) {
+               memcpy(&xop->src_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
+               /*
+                * Determine if the source designator matches the local device
+                */
+               if (!memcmp(&xop->local_dev_wwn[0], &xop->src_tid_wwn[0],
+                               XCOPY_NAA_IEEE_REGEX_LEN)) {
+                       xop->op_origin = XCOL_SOURCE_RECV_OP;
+                       xop->src_dev = se_cmd->se_dev;
+                       pr_debug("XCOPY 0xe4: Set xop->src_dev %p from source"
+                                       " received xop\n", xop->src_dev);
+               }
+       } else {
+               memcpy(&xop->dst_tid_wwn[0], &desc[8], XCOPY_NAA_IEEE_REGEX_LEN);
+               /*
+                * Determine if the destination designator matches the local device
+                */
+               if (!memcmp(&xop->local_dev_wwn[0], &xop->dst_tid_wwn[0],
+                               XCOPY_NAA_IEEE_REGEX_LEN)) {
+                       xop->op_origin = XCOL_DEST_RECV_OP;
+                       xop->dst_dev = se_cmd->se_dev;
+                       pr_debug("XCOPY 0xe4: Set xop->dst_dev: %p from destination"
+                               " received xop\n", xop->dst_dev);
+               }
+       }
+
+       return 0;
+}
+
+static int target_xcopy_parse_target_descriptors(struct se_cmd *se_cmd,
+                               struct xcopy_op *xop, unsigned char *p,
+                               unsigned short tdll)
+{
+       struct se_device *local_dev = se_cmd->se_dev;
+       unsigned char *desc = p;
+       int offset = tdll % XCOPY_TARGET_DESC_LEN, rc, ret = 0;
+       unsigned short start = 0;
+       bool src = true;
+
+       if (offset != 0) {
+               pr_err("XCOPY target descriptor list length is not"
+                       " multiple of %d\n", XCOPY_TARGET_DESC_LEN);
+               return -EINVAL;
+       }
+       if (tdll > 64) {
+               pr_err("XCOPY target descriptor supports a maximum"
+                       " two src/dest descriptors, tdll: %hu too large..\n", tdll);
+               return -EINVAL;
+       }
+       /*
+        * Generate an IEEE Registered Extended designator based upon the
+        * se_device the XCOPY was received upon..
+        */
+       memset(&xop->local_dev_wwn[0], 0, XCOPY_NAA_IEEE_REGEX_LEN);
+       target_xcopy_gen_naa_ieee(local_dev, &xop->local_dev_wwn[0]);
+
+       while (start < tdll) {
+               /*
+                * Check target descriptor identification with 0xE4 type with
+                * use VPD 0x83 WWPN matching ..
+                */
+               switch (desc[0]) {
+               case 0xe4:
+                       rc = target_xcopy_parse_tiddesc_e4(se_cmd, xop,
+                                                       &desc[0], src);
+                       if (rc != 0)
+                               goto out;
+                       /*
+                        * Assume target descriptors are in source -> destination order..
+                        */
+                       if (src == true)
+                               src = false;
+                       else
+                               src = true;
+                       start += XCOPY_TARGET_DESC_LEN;
+                       desc += XCOPY_TARGET_DESC_LEN;
+                       ret++;
+                       break;
+               default:
+                       pr_err("XCOPY unsupported descriptor type code:"
+                                       " 0x%02x\n", desc[0]);
+                       goto out;
+               }
+       }
+
+       if (xop->op_origin == XCOL_SOURCE_RECV_OP)
+               rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, true);
+       else
+               rc = target_xcopy_locate_se_dev_e4(se_cmd, xop, false);
+
+       if (rc < 0)
+               goto out;
+
+       pr_debug("XCOPY TGT desc: Source dev: %p NAA IEEE WWN: 0x%16phN\n",
+                xop->src_dev, &xop->src_tid_wwn[0]);
+       pr_debug("XCOPY TGT desc: Dest dev: %p NAA IEEE WWN: 0x%16phN\n",
+                xop->dst_dev, &xop->dst_tid_wwn[0]);
+
+       return ret;
+
+out:
+       return -EINVAL;
+}
+
+static int target_xcopy_parse_segdesc_02(struct se_cmd *se_cmd, struct xcopy_op *xop,
+                                       unsigned char *p)
+{
+       unsigned char *desc = p;
+       int dc = (desc[1] & 0x02);
+       unsigned short desc_len;
+
+       desc_len = get_unaligned_be16(&desc[2]);
+       if (desc_len != 0x18) {
+               pr_err("XCOPY segment desc 0x02: Illegal desc_len:"
+                               " %hu\n", desc_len);
+               return -EINVAL;
+       }
+
+       xop->stdi = get_unaligned_be16(&desc[4]);
+       xop->dtdi = get_unaligned_be16(&desc[6]);
+       pr_debug("XCOPY seg desc 0x02: desc_len: %hu stdi: %hu dtdi: %hu, DC: %d\n",
+               desc_len, xop->stdi, xop->dtdi, dc);
+
+       xop->nolb = get_unaligned_be16(&desc[10]);
+       xop->src_lba = get_unaligned_be64(&desc[12]);
+       xop->dst_lba = get_unaligned_be64(&desc[20]);
+       pr_debug("XCOPY seg desc 0x02: nolb: %hu src_lba: %llu dst_lba: %llu\n",
+               xop->nolb, (unsigned long long)xop->src_lba,
+               (unsigned long long)xop->dst_lba);
+
+       if (dc != 0) {
+               xop->dbl = (desc[29] << 16) & 0xff;
+               xop->dbl |= (desc[30] << 8) & 0xff;
+               xop->dbl |= desc[31] & 0xff;
+
+               pr_debug("XCOPY seg desc 0x02: DC=1 w/ dbl: %u\n", xop->dbl);
+       }
+       return 0;
+}
+
+static int target_xcopy_parse_segment_descriptors(struct se_cmd *se_cmd,
+                               struct xcopy_op *xop, unsigned char *p,
+                               unsigned int sdll)
+{
+       unsigned char *desc = p;
+       unsigned int start = 0;
+       int offset = sdll % XCOPY_SEGMENT_DESC_LEN, rc, ret = 0;
+
+       if (offset != 0) {
+               pr_err("XCOPY segment descriptor list length is not"
+                       " multiple of %d\n", XCOPY_SEGMENT_DESC_LEN);
+               return -EINVAL;
+       }
+
+       while (start < sdll) {
+               /*
+                * Check segment descriptor type code for block -> block
+                */
+               switch (desc[0]) {
+               case 0x02:
+                       rc = target_xcopy_parse_segdesc_02(se_cmd, xop, desc);
+                       if (rc < 0)
+                               goto out;
+
+                       ret++;
+                       start += XCOPY_SEGMENT_DESC_LEN;
+                       desc += XCOPY_SEGMENT_DESC_LEN;
+                       break;
+               default:
+                       pr_err("XCOPY unspported segment descriptor"
+                               "type: 0x%02x\n", desc[0]);
+                       goto out;
+               }
+       }
+
+       return ret;
+
+out:
+       return -EINVAL;
+}
+
+/*
+ * Start xcopy_pt ops
+ */
+
+struct xcopy_pt_cmd {
+       bool remote_port;
+       struct se_cmd se_cmd;
+       struct xcopy_op *xcopy_op;
+       struct completion xpt_passthrough_sem;
+};
+
+static struct se_port xcopy_pt_port;
+static struct se_portal_group xcopy_pt_tpg;
+static struct se_session xcopy_pt_sess;
+static struct se_node_acl xcopy_pt_nacl;
+
+static char *xcopy_pt_get_fabric_name(void)
+{
+        return "xcopy-pt";
+}
+
+static u32 xcopy_pt_get_tag(struct se_cmd *se_cmd)
+{
+        return 0;
+}
+
+static int xcopy_pt_get_cmd_state(struct se_cmd *se_cmd)
+{
+        return 0;
+}
+
+static void xcopy_pt_undepend_remotedev(struct xcopy_op *xop)
+{
+       struct configfs_subsystem *subsys = target_core_subsystem[0];
+       struct se_device *remote_dev;
+
+       if (xop->op_origin == XCOL_SOURCE_RECV_OP)
+               remote_dev = xop->dst_dev;
+       else
+               remote_dev = xop->src_dev;
+
+       pr_debug("Calling configfs_undepend_item for subsys: %p"
+                 " remote_dev: %p remote_dev->dev_group: %p\n",
+                 subsys, remote_dev, &remote_dev->dev_group.cg_item);
+
+       configfs_undepend_item(subsys, &remote_dev->dev_group.cg_item);
+}
+
+static void xcopy_pt_release_cmd(struct se_cmd *se_cmd)
+{
+       struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd,
+                               struct xcopy_pt_cmd, se_cmd);
+
+       if (xpt_cmd->remote_port)
+               kfree(se_cmd->se_lun);
+
+       kfree(xpt_cmd);
+}
+
+static int xcopy_pt_check_stop_free(struct se_cmd *se_cmd)
+{
+       struct xcopy_pt_cmd *xpt_cmd = container_of(se_cmd,
+                               struct xcopy_pt_cmd, se_cmd);
+
+       complete(&xpt_cmd->xpt_passthrough_sem);
+       return 0;
+}
+
+static int xcopy_pt_write_pending(struct se_cmd *se_cmd)
+{
+       return 0;
+}
+
+static int xcopy_pt_write_pending_status(struct se_cmd *se_cmd)
+{
+       return 0;
+}
+
+static int xcopy_pt_queue_data_in(struct se_cmd *se_cmd)
+{
+       return 0;
+}
+
+static int xcopy_pt_queue_status(struct se_cmd *se_cmd)
+{
+       return 0;
+}
+
+static struct target_core_fabric_ops xcopy_pt_tfo = {
+       .get_fabric_name        = xcopy_pt_get_fabric_name,
+       .get_task_tag           = xcopy_pt_get_tag,
+       .get_cmd_state          = xcopy_pt_get_cmd_state,
+       .release_cmd            = xcopy_pt_release_cmd,
+       .check_stop_free        = xcopy_pt_check_stop_free,
+       .write_pending          = xcopy_pt_write_pending,
+       .write_pending_status   = xcopy_pt_write_pending_status,
+       .queue_data_in          = xcopy_pt_queue_data_in,
+       .queue_status           = xcopy_pt_queue_status,
+};
+
+/*
+ * End xcopy_pt_ops
+ */
+
+int target_xcopy_setup_pt(void)
+{
+       xcopy_wq = alloc_workqueue("xcopy_wq", WQ_MEM_RECLAIM, 0);
+       if (!xcopy_wq) {
+               pr_err("Unable to allocate xcopy_wq\n");
+               return -ENOMEM;
+       }
+
+       memset(&xcopy_pt_port, 0, sizeof(struct se_port));
+       INIT_LIST_HEAD(&xcopy_pt_port.sep_alua_list);
+       INIT_LIST_HEAD(&xcopy_pt_port.sep_list);
+       mutex_init(&xcopy_pt_port.sep_tg_pt_md_mutex);
+
+       memset(&xcopy_pt_tpg, 0, sizeof(struct se_portal_group));
+       INIT_LIST_HEAD(&xcopy_pt_tpg.se_tpg_node);
+       INIT_LIST_HEAD(&xcopy_pt_tpg.acl_node_list);
+       INIT_LIST_HEAD(&xcopy_pt_tpg.tpg_sess_list);
+
+       xcopy_pt_port.sep_tpg = &xcopy_pt_tpg;
+       xcopy_pt_tpg.se_tpg_tfo = &xcopy_pt_tfo;
+
+       memset(&xcopy_pt_nacl, 0, sizeof(struct se_node_acl));
+       INIT_LIST_HEAD(&xcopy_pt_nacl.acl_list);
+       INIT_LIST_HEAD(&xcopy_pt_nacl.acl_sess_list);
+       memset(&xcopy_pt_sess, 0, sizeof(struct se_session));
+       INIT_LIST_HEAD(&xcopy_pt_sess.sess_list);
+       INIT_LIST_HEAD(&xcopy_pt_sess.sess_acl_list);
+
+       xcopy_pt_nacl.se_tpg = &xcopy_pt_tpg;
+       xcopy_pt_nacl.nacl_sess = &xcopy_pt_sess;
+
+       xcopy_pt_sess.se_tpg = &xcopy_pt_tpg;
+       xcopy_pt_sess.se_node_acl = &xcopy_pt_nacl;
+
+       return 0;
+}
+
+void target_xcopy_release_pt(void)
+{
+       if (xcopy_wq)
+               destroy_workqueue(xcopy_wq);
+}
+
+static void target_xcopy_setup_pt_port(
+       struct xcopy_pt_cmd *xpt_cmd,
+       struct xcopy_op *xop,
+       bool remote_port)
+{
+       struct se_cmd *ec_cmd = xop->xop_se_cmd;
+       struct se_cmd *pt_cmd = &xpt_cmd->se_cmd;
+
+       if (xop->op_origin == XCOL_SOURCE_RECV_OP) {
+               /*
+                * Honor destination port reservations for X-COPY PUSH emulation
+                * when CDB is received on local source port, and READs blocks to
+                * WRITE on remote destination port.
+                */
+               if (remote_port) {
+                       xpt_cmd->remote_port = remote_port;
+                       pt_cmd->se_lun->lun_sep = &xcopy_pt_port;
+                       pr_debug("Setup emulated remote DEST xcopy_pt_port: %p to"
+                               " cmd->se_lun->lun_sep for X-COPY data PUSH\n",
+                               pt_cmd->se_lun->lun_sep);
+               } else {
+                       pt_cmd->se_lun = ec_cmd->se_lun;
+                       pt_cmd->se_dev = ec_cmd->se_dev;
+
+                       pr_debug("Honoring local SRC port from ec_cmd->se_dev:"
+                               " %p\n", pt_cmd->se_dev);
+                       pt_cmd->se_lun = ec_cmd->se_lun;
+                       pr_debug("Honoring local SRC port from ec_cmd->se_lun: %p\n",
+                               pt_cmd->se_lun);
+               }
+       } else {
+               /*
+                * Honor source port reservation for X-COPY PULL emulation
+                * when CDB is received on local desintation port, and READs
+                * blocks from the remote source port to WRITE on local
+                * destination port.
+                */
+               if (remote_port) {
+                       xpt_cmd->remote_port = remote_port;
+                       pt_cmd->se_lun->lun_sep = &xcopy_pt_port;
+                       pr_debug("Setup emulated remote SRC xcopy_pt_port: %p to"
+                               " cmd->se_lun->lun_sep for X-COPY data PULL\n",
+                               pt_cmd->se_lun->lun_sep);
+               } else {
+                       pt_cmd->se_lun = ec_cmd->se_lun;
+                       pt_cmd->se_dev = ec_cmd->se_dev;
+
+                       pr_debug("Honoring local DST port from ec_cmd->se_dev:"
+                               " %p\n", pt_cmd->se_dev);
+                       pt_cmd->se_lun = ec_cmd->se_lun;
+                       pr_debug("Honoring local DST port from ec_cmd->se_lun: %p\n",
+                               pt_cmd->se_lun);
+               }
+       }
+}
+
+static int target_xcopy_init_pt_lun(
+       struct xcopy_pt_cmd *xpt_cmd,
+       struct xcopy_op *xop,
+       struct se_device *se_dev,
+       struct se_cmd *pt_cmd,
+       bool remote_port)
+{
+       /*
+        * Don't allocate + init an pt_cmd->se_lun if honoring local port for
+        * reservations.  The pt_cmd->se_lun pointer will be setup from within
+        * target_xcopy_setup_pt_port()
+        */
+       if (remote_port == false) {
+               pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
+               return 0;
+       }
+
+       pt_cmd->se_lun = kzalloc(sizeof(struct se_lun), GFP_KERNEL);
+       if (!pt_cmd->se_lun) {
+               pr_err("Unable to allocate pt_cmd->se_lun\n");
+               return -ENOMEM;
+       }
+       init_completion(&pt_cmd->se_lun->lun_shutdown_comp);
+       INIT_LIST_HEAD(&pt_cmd->se_lun->lun_cmd_list);
+       INIT_LIST_HEAD(&pt_cmd->se_lun->lun_acl_list);
+       spin_lock_init(&pt_cmd->se_lun->lun_acl_lock);
+       spin_lock_init(&pt_cmd->se_lun->lun_cmd_lock);
+       spin_lock_init(&pt_cmd->se_lun->lun_sep_lock);
+
+       pt_cmd->se_dev = se_dev;
+
+       pr_debug("Setup emulated se_dev: %p from se_dev\n", pt_cmd->se_dev);
+       pt_cmd->se_lun->lun_se_dev = se_dev;
+       pt_cmd->se_cmd_flags |= SCF_SE_LUN_CMD | SCF_CMD_XCOPY_PASSTHROUGH;
+
+       pr_debug("Setup emulated se_dev: %p to pt_cmd->se_lun->lun_se_dev\n",
+               pt_cmd->se_lun->lun_se_dev);
+
+       return 0;
+}
+
+static int target_xcopy_setup_pt_cmd(
+       struct xcopy_pt_cmd *xpt_cmd,
+       struct xcopy_op *xop,
+       struct se_device *se_dev,
+       unsigned char *cdb,
+       bool remote_port,
+       bool alloc_mem)
+{
+       struct se_cmd *cmd = &xpt_cmd->se_cmd;
+       sense_reason_t sense_rc;
+       int ret = 0, rc;
+       /*
+        * Setup LUN+port to honor reservations based upon xop->op_origin for
+        * X-COPY PUSH or X-COPY PULL based upon where the CDB was received.
+        */
+       rc = target_xcopy_init_pt_lun(xpt_cmd, xop, se_dev, cmd, remote_port);
+       if (rc < 0) {
+               ret = rc;
+               goto out;
+       }
+       xpt_cmd->xcopy_op = xop;
+       target_xcopy_setup_pt_port(xpt_cmd, xop, remote_port);
+
+       sense_rc = target_setup_cmd_from_cdb(cmd, cdb);
+       if (sense_rc) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (alloc_mem) {
+               rc = target_alloc_sgl(&cmd->t_data_sg, &cmd->t_data_nents,
+                                     cmd->data_length, false);
+               if (rc < 0) {
+                       ret = rc;
+                       goto out;
+               }
+               /*
+                * Set this bit so that transport_free_pages() allows the
+                * caller to release SGLs + physical memory allocated by
+                * transport_generic_get_mem()..
+                */
+               cmd->se_cmd_flags |= SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
+       } else {
+               /*
+                * Here the previously allocated SGLs for the internal READ
+                * are mapped zero-copy to the internal WRITE.
+                */
+               sense_rc = transport_generic_map_mem_to_cmd(cmd,
+                                       xop->xop_data_sg, xop->xop_data_nents,
+                                       NULL, 0);
+               if (sense_rc) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               pr_debug("Setup PASSTHROUGH_NOALLOC t_data_sg: %p t_data_nents:"
+                        " %u\n", cmd->t_data_sg, cmd->t_data_nents);
+       }
+
+       return 0;
+
+out:
+       if (remote_port == true)
+               kfree(cmd->se_lun);
+       return ret;
+}
+
+static int target_xcopy_issue_pt_cmd(struct xcopy_pt_cmd *xpt_cmd)
+{
+       struct se_cmd *se_cmd = &xpt_cmd->se_cmd;
+       sense_reason_t sense_rc;
+
+       sense_rc = transport_generic_new_cmd(se_cmd);
+       if (sense_rc)
+               return -EINVAL;
+
+       if (se_cmd->data_direction == DMA_TO_DEVICE)
+               target_execute_cmd(se_cmd);
+
+       wait_for_completion_interruptible(&xpt_cmd->xpt_passthrough_sem);
+
+       pr_debug("target_xcopy_issue_pt_cmd(): SCSI status: 0x%02x\n",
+                       se_cmd->scsi_status);
+       return 0;
+}
+
+static int target_xcopy_read_source(
+       struct se_cmd *ec_cmd,
+       struct xcopy_op *xop,
+       struct se_device *src_dev,
+       sector_t src_lba,
+       u32 src_sectors)
+{
+       struct xcopy_pt_cmd *xpt_cmd;
+       struct se_cmd *se_cmd;
+       u32 length = (src_sectors * src_dev->dev_attrib.block_size);
+       int rc;
+       unsigned char cdb[16];
+       bool remote_port = (xop->op_origin == XCOL_DEST_RECV_OP);
+
+       xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL);
+       if (!xpt_cmd) {
+               pr_err("Unable to allocate xcopy_pt_cmd\n");
+               return -ENOMEM;
+       }
+       init_completion(&xpt_cmd->xpt_passthrough_sem);
+       se_cmd = &xpt_cmd->se_cmd;
+
+       memset(&cdb[0], 0, 16);
+       cdb[0] = READ_16;
+       put_unaligned_be64(src_lba, &cdb[2]);
+       put_unaligned_be32(src_sectors, &cdb[10]);
+       pr_debug("XCOPY: Built READ_16: LBA: %llu Sectors: %u Length: %u\n",
+               (unsigned long long)src_lba, src_sectors, length);
+
+       transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length,
+                               DMA_FROM_DEVICE, 0, NULL);
+       xop->src_pt_cmd = xpt_cmd;
+
+       rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, src_dev, &cdb[0],
+                               remote_port, true);
+       if (rc < 0) {
+               transport_generic_free_cmd(se_cmd, 0);
+               return rc;
+       }
+
+       xop->xop_data_sg = se_cmd->t_data_sg;
+       xop->xop_data_nents = se_cmd->t_data_nents;
+       pr_debug("XCOPY-READ: Saved xop->xop_data_sg: %p, num: %u for READ"
+               " memory\n", xop->xop_data_sg, xop->xop_data_nents);
+
+       rc = target_xcopy_issue_pt_cmd(xpt_cmd);
+       if (rc < 0) {
+               transport_generic_free_cmd(se_cmd, 0);
+               return rc;
+       }
+       /*
+        * Clear off the allocated t_data_sg, that has been saved for
+        * zero-copy WRITE submission reuse in struct xcopy_op..
+        */
+       se_cmd->t_data_sg = NULL;
+       se_cmd->t_data_nents = 0;
+
+       return 0;
+}
+
+static int target_xcopy_write_destination(
+       struct se_cmd *ec_cmd,
+       struct xcopy_op *xop,
+       struct se_device *dst_dev,
+       sector_t dst_lba,
+       u32 dst_sectors)
+{
+       struct xcopy_pt_cmd *xpt_cmd;
+       struct se_cmd *se_cmd;
+       u32 length = (dst_sectors * dst_dev->dev_attrib.block_size);
+       int rc;
+       unsigned char cdb[16];
+       bool remote_port = (xop->op_origin == XCOL_SOURCE_RECV_OP);
+
+       xpt_cmd = kzalloc(sizeof(struct xcopy_pt_cmd), GFP_KERNEL);
+       if (!xpt_cmd) {
+               pr_err("Unable to allocate xcopy_pt_cmd\n");
+               return -ENOMEM;
+       }
+       init_completion(&xpt_cmd->xpt_passthrough_sem);
+       se_cmd = &xpt_cmd->se_cmd;
+
+       memset(&cdb[0], 0, 16);
+       cdb[0] = WRITE_16;
+       put_unaligned_be64(dst_lba, &cdb[2]);
+       put_unaligned_be32(dst_sectors, &cdb[10]);
+       pr_debug("XCOPY: Built WRITE_16: LBA: %llu Sectors: %u Length: %u\n",
+               (unsigned long long)dst_lba, dst_sectors, length);
+
+       transport_init_se_cmd(se_cmd, &xcopy_pt_tfo, NULL, length,
+                               DMA_TO_DEVICE, 0, NULL);
+       xop->dst_pt_cmd = xpt_cmd;
+
+       rc = target_xcopy_setup_pt_cmd(xpt_cmd, xop, dst_dev, &cdb[0],
+                               remote_port, false);
+       if (rc < 0) {
+               struct se_cmd *src_cmd = &xop->src_pt_cmd->se_cmd;
+               /*
+                * If the failure happened before the t_mem_list hand-off in
+                * target_xcopy_setup_pt_cmd(), Reset memory + clear flag so that
+                * core releases this memory on error during X-COPY WRITE I/O.
+                */
+               src_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
+               src_cmd->t_data_sg = xop->xop_data_sg;
+               src_cmd->t_data_nents = xop->xop_data_nents;
+
+               transport_generic_free_cmd(se_cmd, 0);
+               return rc;
+       }
+
+       rc = target_xcopy_issue_pt_cmd(xpt_cmd);
+       if (rc < 0) {
+               se_cmd->se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
+               transport_generic_free_cmd(se_cmd, 0);
+               return rc;
+       }
+
+       return 0;
+}
+
+static void target_xcopy_do_work(struct work_struct *work)
+{
+       struct xcopy_op *xop = container_of(work, struct xcopy_op, xop_work);
+       struct se_device *src_dev = xop->src_dev, *dst_dev = xop->dst_dev;
+       struct se_cmd *ec_cmd = xop->xop_se_cmd;
+       sector_t src_lba = xop->src_lba, dst_lba = xop->dst_lba, end_lba;
+       unsigned int max_sectors;
+       int rc;
+       unsigned short nolb = xop->nolb, cur_nolb, max_nolb, copied_nolb = 0;
+
+       end_lba = src_lba + nolb;
+       /*
+        * Break up XCOPY I/O into hw_max_sectors sized I/O based on the
+        * smallest max_sectors between src_dev + dev_dev, or
+        */
+       max_sectors = min(src_dev->dev_attrib.hw_max_sectors,
+                         dst_dev->dev_attrib.hw_max_sectors);
+       max_sectors = min_t(u32, max_sectors, XCOPY_MAX_SECTORS);
+
+       max_nolb = min_t(u16, max_sectors, ((u16)(~0U)));
+
+       pr_debug("target_xcopy_do_work: nolb: %hu, max_nolb: %hu end_lba: %llu\n",
+                       nolb, max_nolb, (unsigned long long)end_lba);
+       pr_debug("target_xcopy_do_work: Starting src_lba: %llu, dst_lba: %llu\n",
+                       (unsigned long long)src_lba, (unsigned long long)dst_lba);
+
+       while (src_lba < end_lba) {
+               cur_nolb = min(nolb, max_nolb);
+
+               pr_debug("target_xcopy_do_work: Calling read src_dev: %p src_lba: %llu,"
+                       " cur_nolb: %hu\n", src_dev, (unsigned long long)src_lba, cur_nolb);
+
+               rc = target_xcopy_read_source(ec_cmd, xop, src_dev, src_lba, cur_nolb);
+               if (rc < 0)
+                       goto out;
+
+               src_lba += cur_nolb;
+               pr_debug("target_xcopy_do_work: Incremented READ src_lba to %llu\n",
+                               (unsigned long long)src_lba);
+
+               pr_debug("target_xcopy_do_work: Calling write dst_dev: %p dst_lba: %llu,"
+                       " cur_nolb: %hu\n", dst_dev, (unsigned long long)dst_lba, cur_nolb);
+
+               rc = target_xcopy_write_destination(ec_cmd, xop, dst_dev,
+                                               dst_lba, cur_nolb);
+               if (rc < 0) {
+                       transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0);
+                       goto out;
+               }
+
+               dst_lba += cur_nolb;
+               pr_debug("target_xcopy_do_work: Incremented WRITE dst_lba to %llu\n",
+                               (unsigned long long)dst_lba);
+
+               copied_nolb += cur_nolb;
+               nolb -= cur_nolb;
+
+               transport_generic_free_cmd(&xop->src_pt_cmd->se_cmd, 0);
+               xop->dst_pt_cmd->se_cmd.se_cmd_flags &= ~SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC;
+
+               transport_generic_free_cmd(&xop->dst_pt_cmd->se_cmd, 0);
+       }
+
+       xcopy_pt_undepend_remotedev(xop);
+       kfree(xop);
+
+       pr_debug("target_xcopy_do_work: Final src_lba: %llu, dst_lba: %llu\n",
+               (unsigned long long)src_lba, (unsigned long long)dst_lba);
+       pr_debug("target_xcopy_do_work: Blocks copied: %hu, Bytes Copied: %u\n",
+               copied_nolb, copied_nolb * dst_dev->dev_attrib.block_size);
+
+       pr_debug("target_xcopy_do_work: Setting X-COPY GOOD status -> sending response\n");
+       target_complete_cmd(ec_cmd, SAM_STAT_GOOD);
+       return;
+
+out:
+       xcopy_pt_undepend_remotedev(xop);
+       kfree(xop);
+
+       pr_warn("target_xcopy_do_work: Setting X-COPY CHECK_CONDITION -> sending response\n");
+       ec_cmd->scsi_status = SAM_STAT_CHECK_CONDITION;
+       target_complete_cmd(ec_cmd, SAM_STAT_CHECK_CONDITION);
+}
+
+sense_reason_t target_do_xcopy(struct se_cmd *se_cmd)
+{
+       struct xcopy_op *xop = NULL;
+       unsigned char *p = NULL, *seg_desc;
+       unsigned int list_id, list_id_usage, sdll, inline_dl, sa;
+       int rc;
+       unsigned short tdll;
+
+       sa = se_cmd->t_task_cdb[1] & 0x1f;
+       if (sa != 0x00) {
+               pr_err("EXTENDED_COPY(LID4) not supported\n");
+               return TCM_UNSUPPORTED_SCSI_OPCODE;
+       }
+
+       p = transport_kmap_data_sg(se_cmd);
+       if (!p) {
+               pr_err("transport_kmap_data_sg() failed in target_do_xcopy\n");
+               return TCM_OUT_OF_RESOURCES;
+       }
+
+       list_id = p[0];
+       if (list_id != 0x00) {
+               pr_err("XCOPY with non zero list_id: 0x%02x\n", list_id);
+               goto out;
+       }
+       list_id_usage = (p[1] & 0x18);
+       /*
+        * Determine TARGET DESCRIPTOR LIST LENGTH + SEGMENT DESCRIPTOR LIST LENGTH
+        */
+       tdll = get_unaligned_be16(&p[2]);
+       sdll = get_unaligned_be32(&p[8]);
+
+       inline_dl = get_unaligned_be32(&p[12]);
+       if (inline_dl != 0) {
+               pr_err("XCOPY with non zero inline data length\n");
+               goto out;
+       }
+
+       xop = kzalloc(sizeof(struct xcopy_op), GFP_KERNEL);
+       if (!xop) {
+               pr_err("Unable to allocate xcopy_op\n");
+               goto out;
+       }
+       xop->xop_se_cmd = se_cmd;
+
+       pr_debug("Processing XCOPY with list_id: 0x%02x list_id_usage: 0x%02x"
+               " tdll: %hu sdll: %u inline_dl: %u\n", list_id, list_id_usage,
+               tdll, sdll, inline_dl);
+
+       rc = target_xcopy_parse_target_descriptors(se_cmd, xop, &p[16], tdll);
+       if (rc <= 0)
+               goto out;
+
+       pr_debug("XCOPY: Processed %d target descriptors, length: %u\n", rc,
+                               rc * XCOPY_TARGET_DESC_LEN);
+       seg_desc = &p[16];
+       seg_desc += (rc * XCOPY_TARGET_DESC_LEN);
+
+       rc = target_xcopy_parse_segment_descriptors(se_cmd, xop, seg_desc, sdll);
+       if (rc <= 0) {
+               xcopy_pt_undepend_remotedev(xop);
+               goto out;
+       }
+       transport_kunmap_data_sg(se_cmd);
+
+       pr_debug("XCOPY: Processed %d segment descriptors, length: %u\n", rc,
+                               rc * XCOPY_SEGMENT_DESC_LEN);
+       INIT_WORK(&xop->xop_work, target_xcopy_do_work);
+       queue_work(xcopy_wq, &xop->xop_work);
+       return TCM_NO_SENSE;
+
+out:
+       if (p)
+               transport_kunmap_data_sg(se_cmd);
+       kfree(xop);
+       return TCM_INVALID_CDB_FIELD;
+}
+
+static sense_reason_t target_rcr_operating_parameters(struct se_cmd *se_cmd)
+{
+       unsigned char *p;
+
+       p = transport_kmap_data_sg(se_cmd);
+       if (!p) {
+               pr_err("transport_kmap_data_sg failed in"
+                      " target_rcr_operating_parameters\n");
+               return TCM_OUT_OF_RESOURCES;
+       }
+
+       if (se_cmd->data_length < 54) {
+               pr_err("Receive Copy Results Op Parameters length"
+                      " too small: %u\n", se_cmd->data_length);
+               transport_kunmap_data_sg(se_cmd);
+               return TCM_INVALID_CDB_FIELD;
+       }
+       /*
+        * Set SNLID=1 (Supports no List ID)
+        */
+       p[4] = 0x1;
+       /*
+        * MAXIMUM TARGET DESCRIPTOR COUNT
+        */
+       put_unaligned_be16(RCR_OP_MAX_TARGET_DESC_COUNT, &p[8]);
+       /*
+        * MAXIMUM SEGMENT DESCRIPTOR COUNT
+        */
+       put_unaligned_be16(RCR_OP_MAX_SG_DESC_COUNT, &p[10]);
+       /*
+        * MAXIMUM DESCRIPTOR LIST LENGTH
+        */
+       put_unaligned_be32(RCR_OP_MAX_DESC_LIST_LEN, &p[12]);
+       /*
+        * MAXIMUM SEGMENT LENGTH
+        */
+       put_unaligned_be32(RCR_OP_MAX_SEGMENT_LEN, &p[16]);
+       /*
+        * MAXIMUM INLINE DATA LENGTH for SA 0x04 (NOT SUPPORTED)
+        */
+       put_unaligned_be32(0x0, &p[20]);
+       /*
+        * HELD DATA LIMIT
+        */
+       put_unaligned_be32(0x0, &p[24]);
+       /*
+        * MAXIMUM STREAM DEVICE TRANSFER SIZE
+        */
+       put_unaligned_be32(0x0, &p[28]);
+       /*
+        * TOTAL CONCURRENT COPIES
+        */
+       put_unaligned_be16(RCR_OP_TOTAL_CONCURR_COPIES, &p[34]);
+       /*
+        * MAXIMUM CONCURRENT COPIES
+        */
+       p[36] = RCR_OP_MAX_CONCURR_COPIES;
+       /*
+        * DATA SEGMENT GRANULARITY (log 2)
+        */
+       p[37] = RCR_OP_DATA_SEG_GRAN_LOG2;
+       /*
+        * INLINE DATA GRANULARITY log 2)
+        */
+       p[38] = RCR_OP_INLINE_DATA_GRAN_LOG2;
+       /*
+        * HELD DATA GRANULARITY
+        */
+       p[39] = RCR_OP_HELD_DATA_GRAN_LOG2;
+       /*
+        * IMPLEMENTED DESCRIPTOR LIST LENGTH
+        */
+       p[43] = 0x2;
+       /*
+        * List of implemented descriptor type codes (ordered)
+        */
+       p[44] = 0x02; /* Copy Block to Block device */
+       p[45] = 0xe4; /* Identification descriptor target descriptor */
+
+       /*
+        * AVAILABLE DATA (n-3)
+        */
+       put_unaligned_be32(42, &p[0]);
+
+       transport_kunmap_data_sg(se_cmd);
+       target_complete_cmd(se_cmd, GOOD);
+
+       return TCM_NO_SENSE;
+}
+
+sense_reason_t target_do_receive_copy_results(struct se_cmd *se_cmd)
+{
+       unsigned char *cdb = &se_cmd->t_task_cdb[0];
+       int sa = (cdb[1] & 0x1f), list_id = cdb[2];
+       sense_reason_t rc = TCM_NO_SENSE;
+
+       pr_debug("Entering target_do_receive_copy_results: SA: 0x%02x, List ID:"
+               " 0x%02x, AL: %u\n", sa, list_id, se_cmd->data_length);
+
+       if (list_id != 0) {
+               pr_err("Receive Copy Results with non zero list identifier"
+                      " not supported\n");
+               return TCM_INVALID_CDB_FIELD;
+       }
+
+       switch (sa) {
+       case RCR_SA_OPERATING_PARAMETERS:
+               rc = target_rcr_operating_parameters(se_cmd);
+               break;
+       case RCR_SA_COPY_STATUS:
+       case RCR_SA_RECEIVE_DATA:
+       case RCR_SA_FAILED_SEGMENT_DETAILS:
+       default:
+               pr_err("Unsupported SA for receive copy results: 0x%02x\n", sa);
+               return TCM_INVALID_CDB_FIELD;
+       }
+
+       return rc;
+}
diff --git a/drivers/target/target_core_xcopy.h b/drivers/target/target_core_xcopy.h
new file mode 100644 (file)
index 0000000..700a981
--- /dev/null
@@ -0,0 +1,62 @@
+#define XCOPY_TARGET_DESC_LEN          32
+#define XCOPY_SEGMENT_DESC_LEN         28
+#define XCOPY_NAA_IEEE_REGEX_LEN       16
+#define XCOPY_MAX_SECTORS              1024
+
+enum xcopy_origin_list {
+       XCOL_SOURCE_RECV_OP = 0x01,
+       XCOL_DEST_RECV_OP = 0x02,
+};
+
+struct xcopy_pt_cmd;
+
+struct xcopy_op {
+       int op_origin;
+
+       struct se_cmd *xop_se_cmd;
+       struct se_device *src_dev;
+       unsigned char src_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
+       struct se_device *dst_dev;
+       unsigned char dst_tid_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
+       unsigned char local_dev_wwn[XCOPY_NAA_IEEE_REGEX_LEN];
+
+       sector_t src_lba;
+       sector_t dst_lba;
+       unsigned short stdi;
+       unsigned short dtdi;
+       unsigned short nolb;
+       unsigned int dbl;
+
+       struct xcopy_pt_cmd *src_pt_cmd;
+       struct xcopy_pt_cmd *dst_pt_cmd;
+
+       u32 xop_data_nents;
+       struct scatterlist *xop_data_sg;
+       struct work_struct xop_work;
+};
+
+/*
+ * Receive Copy Results Sevice Actions
+ */
+#define RCR_SA_COPY_STATUS             0x00
+#define RCR_SA_RECEIVE_DATA            0x01
+#define RCR_SA_OPERATING_PARAMETERS    0x03
+#define RCR_SA_FAILED_SEGMENT_DETAILS  0x04
+
+/*
+ * Receive Copy Results defs for Operating Parameters
+ */
+#define RCR_OP_MAX_TARGET_DESC_COUNT   0x2
+#define RCR_OP_MAX_SG_DESC_COUNT       0x1
+#define RCR_OP_MAX_DESC_LIST_LEN       1024
+#define RCR_OP_MAX_SEGMENT_LEN         268435456 /* 256 MB */
+#define RCR_OP_TOTAL_CONCURR_COPIES    0x1 /* Must be <= 16384 */
+#define RCR_OP_MAX_CONCURR_COPIES      0x1 /* Must be <= 255 */
+#define RCR_OP_DATA_SEG_GRAN_LOG2      9 /* 512 bytes in log 2 */
+#define RCR_OP_INLINE_DATA_GRAN_LOG2   9 /* 512 bytes in log 2 */
+#define RCR_OP_HELD_DATA_GRAN_LOG2     9 /* 512 bytes in log 2 */
+
+extern int target_xcopy_setup_pt(void);
+extern void target_xcopy_release_pt(void);
+extern sense_reason_t target_do_xcopy(struct se_cmd *);
+extern sense_reason_t target_do_receive_copy_results(struct se_cmd *);
index b74feb0..4e00508 100644 (file)
@@ -311,7 +311,11 @@ static struct se_portal_group *ft_add_tpg(
         */
        if (strstr(name, "tpgt_") != name)
                return NULL;
-       if (strict_strtoul(name + 5, 10, &index) || index > UINT_MAX)
+
+       ret = kstrtoul(name + 5, 10, &index);
+       if (ret)
+               return NULL;
+       if (index > UINT_MAX)
                return NULL;
 
        lacl = container_of(wwn, struct ft_lport_acl, fc_lport_wwn);
index e988c81..dbfc390 100644 (file)
@@ -17,8 +17,17 @@ if THERMAL
 
 config THERMAL_HWMON
        bool
+       prompt "Expose thermal sensors as hwmon device"
        depends on HWMON=y || HWMON=THERMAL
        default y
+       help
+         In case a sensor is registered with the thermal
+         framework, this option will also register it
+         as a hwmon. The sensor will then have the common
+         hwmon sysfs interface.
+
+         Say 'Y' here if you want all thermal sensors to
+         have hwmon sysfs interface too.
 
 choice
        prompt "Default Thermal governor"
@@ -91,6 +100,17 @@ config THERMAL_EMULATION
          because userland can easily disable the thermal policy by simply
          flooding this sysfs node with low temperature values.
 
+config IMX_THERMAL
+       tristate "Temperature sensor driver for Freescale i.MX SoCs"
+       depends on CPU_THERMAL
+       depends on MFD_SYSCON
+       depends on OF
+       help
+         Support for Temperature Monitor (TEMPMON) found on Freescale i.MX SoCs.
+         It supports one critical trip point and one passive trip point.  The
+         cpufreq is used as the cooling device to throttle CPUs when the
+         passive trip is crossed.
+
 config SPEAR_THERMAL
        bool "SPEAr thermal sensor driver"
        depends on PLAT_SPEAR
@@ -114,14 +134,6 @@ config KIRKWOOD_THERMAL
          Support for the Kirkwood thermal sensor driver into the Linux thermal
          framework. Only kirkwood 88F6282 and 88F6283 have this sensor.
 
-config EXYNOS_THERMAL
-       tristate "Temperature sensor on Samsung EXYNOS"
-       depends on (ARCH_EXYNOS4 || ARCH_EXYNOS5)
-       depends on CPU_THERMAL
-       help
-         If you say yes here you get support for TMU (Thermal Management
-         Unit) on SAMSUNG EXYNOS series of SoC.
-
 config DOVE_THERMAL
        tristate "Temperature sensor on Marvell Dove SoCs"
        depends on ARCH_DOVE
@@ -184,4 +196,9 @@ menu "Texas Instruments thermal drivers"
 source "drivers/thermal/ti-soc-thermal/Kconfig"
 endmenu
 
+menu "Samsung thermal drivers"
+depends on PLAT_SAMSUNG
+source "drivers/thermal/samsung/Kconfig"
+endmenu
+
 endif
index 67184a2..584b363 100644 (file)
@@ -5,6 +5,9 @@
 obj-$(CONFIG_THERMAL)          += thermal_sys.o
 thermal_sys-y                  += thermal_core.o
 
+# interface to/from other layers providing sensors
+thermal_sys-$(CONFIG_THERMAL_HWMON)            += thermal_hwmon.o
+
 # governors
 thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE)   += fair_share.o
 thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE)    += step_wise.o
@@ -17,10 +20,11 @@ thermal_sys-$(CONFIG_CPU_THERMAL)   += cpu_cooling.o
 obj-$(CONFIG_SPEAR_THERMAL)    += spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)     += rcar_thermal.o
 obj-$(CONFIG_KIRKWOOD_THERMAL)  += kirkwood_thermal.o
-obj-$(CONFIG_EXYNOS_THERMAL)   += exynos_thermal.o
+obj-y                          += samsung/
 obj-$(CONFIG_DOVE_THERMAL)     += dove_thermal.o
 obj-$(CONFIG_DB8500_THERMAL)   += db8500_thermal.o
 obj-$(CONFIG_ARMADA_THERMAL)   += armada_thermal.o
+obj-$(CONFIG_IMX_THERMAL)      += imx_thermal.o
 obj-$(CONFIG_DB8500_CPUFREQ_COOLING)   += db8500_cpufreq_cooling.o
 obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o
 obj-$(CONFIG_X86_PKG_TEMP_THERMAL)     += x86_pkg_temp_thermal.o
index 82e15db..d179028 100644 (file)
@@ -322,6 +322,8 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb,
 
        if (cpumask_test_cpu(policy->cpu, &notify_device->allowed_cpus))
                max_freq = notify_device->cpufreq_val;
+       else
+               return 0;
 
        /* Never exceed user_policy.max */
        if (max_freq > policy->user_policy.max)
@@ -496,8 +498,12 @@ EXPORT_SYMBOL_GPL(cpufreq_cooling_register);
  */
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
 {
-       struct cpufreq_cooling_device *cpufreq_dev = cdev->devdata;
+       struct cpufreq_cooling_device *cpufreq_dev;
+
+       if (!cdev)
+               return;
 
+       cpufreq_dev = cdev->devdata;
        mutex_lock(&cooling_cpufreq_lock);
        cpufreq_dev_count--;
 
diff --git a/drivers/thermal/exynos_thermal.c b/drivers/thermal/exynos_thermal.c
deleted file mode 100644 (file)
index 9af4b93..0000000
+++ /dev/null
@@ -1,1059 +0,0 @@
-/*
- * exynos_thermal.c - Samsung EXYNOS TMU (Thermal Management Unit)
- *
- *  Copyright (C) 2011 Samsung Electronics
- *  Donggeun Kim <dg77.kim@samsung.com>
- *  Amit Daniel Kachhap <amit.kachhap@linaro.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- *
- */
-
-#include <linux/module.h>
-#include <linux/err.h>
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/workqueue.h>
-#include <linux/sysfs.h>
-#include <linux/kobject.h>
-#include <linux/io.h>
-#include <linux/mutex.h>
-#include <linux/platform_data/exynos_thermal.h>
-#include <linux/thermal.h>
-#include <linux/cpufreq.h>
-#include <linux/cpu_cooling.h>
-#include <linux/of.h>
-
-/* Exynos generic registers */
-#define EXYNOS_TMU_REG_TRIMINFO                0x0
-#define EXYNOS_TMU_REG_CONTROL         0x20
-#define EXYNOS_TMU_REG_STATUS          0x28
-#define EXYNOS_TMU_REG_CURRENT_TEMP    0x40
-#define EXYNOS_TMU_REG_INTEN           0x70
-#define EXYNOS_TMU_REG_INTSTAT         0x74
-#define EXYNOS_TMU_REG_INTCLEAR                0x78
-
-#define EXYNOS_TMU_TRIM_TEMP_MASK      0xff
-#define EXYNOS_TMU_GAIN_SHIFT          8
-#define EXYNOS_TMU_REF_VOLTAGE_SHIFT   24
-#define EXYNOS_TMU_CORE_ON             3
-#define EXYNOS_TMU_CORE_OFF            2
-#define EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET     50
-
-/* Exynos4210 specific registers */
-#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP      0x44
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58
-#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C
-#define EXYNOS4210_TMU_REG_PAST_TEMP0  0x60
-#define EXYNOS4210_TMU_REG_PAST_TEMP1  0x64
-#define EXYNOS4210_TMU_REG_PAST_TEMP2  0x68
-#define EXYNOS4210_TMU_REG_PAST_TEMP3  0x6C
-
-#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK        0x1
-#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK        0x10
-#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK        0x100
-#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK        0x1000
-#define EXYNOS4210_TMU_INTCLEAR_VAL    0x1111
-
-/* Exynos5250 and Exynos4412 specific registers */
-#define EXYNOS_TMU_TRIMINFO_CON        0x14
-#define EXYNOS_THD_TEMP_RISE           0x50
-#define EXYNOS_THD_TEMP_FALL           0x54
-#define EXYNOS_EMUL_CON                0x80
-
-#define EXYNOS_TRIMINFO_RELOAD         0x1
-#define EXYNOS_TMU_CLEAR_RISE_INT      0x111
-#define EXYNOS_TMU_CLEAR_FALL_INT      (0x111 << 12)
-#define EXYNOS_MUX_ADDR_VALUE          6
-#define EXYNOS_MUX_ADDR_SHIFT          20
-#define EXYNOS_TMU_TRIP_MODE_SHIFT     13
-
-#define EFUSE_MIN_VALUE 40
-#define EFUSE_MAX_VALUE 100
-
-/* In-kernel thermal framework related macros & definations */
-#define SENSOR_NAME_LEN        16
-#define MAX_TRIP_COUNT 8
-#define MAX_COOLING_DEVICE 4
-#define MAX_THRESHOLD_LEVS 4
-
-#define ACTIVE_INTERVAL 500
-#define IDLE_INTERVAL 10000
-#define MCELSIUS       1000
-
-#ifdef CONFIG_THERMAL_EMULATION
-#define EXYNOS_EMUL_TIME       0x57F0
-#define EXYNOS_EMUL_TIME_SHIFT 16
-#define EXYNOS_EMUL_DATA_SHIFT 8
-#define EXYNOS_EMUL_DATA_MASK  0xFF
-#define EXYNOS_EMUL_ENABLE     0x1
-#endif /* CONFIG_THERMAL_EMULATION */
-
-/* CPU Zone information */
-#define PANIC_ZONE      4
-#define WARN_ZONE       3
-#define MONITOR_ZONE    2
-#define SAFE_ZONE       1
-
-#define GET_ZONE(trip) (trip + 2)
-#define GET_TRIP(zone) (zone - 2)
-
-#define EXYNOS_ZONE_COUNT      3
-
-struct exynos_tmu_data {
-       struct exynos_tmu_platform_data *pdata;
-       struct resource *mem;
-       void __iomem *base;
-       int irq;
-       enum soc_type soc;
-       struct work_struct irq_work;
-       struct mutex lock;
-       struct clk *clk;
-       u8 temp_error1, temp_error2;
-};
-
-struct thermal_trip_point_conf {
-       int trip_val[MAX_TRIP_COUNT];
-       int trip_count;
-       u8 trigger_falling;
-};
-
-struct thermal_cooling_conf {
-       struct freq_clip_table freq_data[MAX_TRIP_COUNT];
-       int freq_clip_count;
-};
-
-struct thermal_sensor_conf {
-       char name[SENSOR_NAME_LEN];
-       int (*read_temperature)(void *data);
-       int (*write_emul_temp)(void *drv_data, unsigned long temp);
-       struct thermal_trip_point_conf trip_data;
-       struct thermal_cooling_conf cooling_data;
-       void *private_data;
-};
-
-struct exynos_thermal_zone {
-       enum thermal_device_mode mode;
-       struct thermal_zone_device *therm_dev;
-       struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE];
-       unsigned int cool_dev_size;
-       struct platform_device *exynos4_dev;
-       struct thermal_sensor_conf *sensor_conf;
-       bool bind;
-};
-
-static struct exynos_thermal_zone *th_zone;
-static void exynos_unregister_thermal(void);
-static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf);
-
-/* Get mode callback functions for thermal zone */
-static int exynos_get_mode(struct thermal_zone_device *thermal,
-                       enum thermal_device_mode *mode)
-{
-       if (th_zone)
-               *mode = th_zone->mode;
-       return 0;
-}
-
-/* Set mode callback functions for thermal zone */
-static int exynos_set_mode(struct thermal_zone_device *thermal,
-                       enum thermal_device_mode mode)
-{
-       if (!th_zone->therm_dev) {
-               pr_notice("thermal zone not registered\n");
-               return 0;
-       }
-
-       mutex_lock(&th_zone->therm_dev->lock);
-
-       if (mode == THERMAL_DEVICE_ENABLED &&
-               !th_zone->sensor_conf->trip_data.trigger_falling)
-               th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
-       else
-               th_zone->therm_dev->polling_delay = 0;
-
-       mutex_unlock(&th_zone->therm_dev->lock);
-
-       th_zone->mode = mode;
-       thermal_zone_device_update(th_zone->therm_dev);
-       pr_info("thermal polling set for duration=%d msec\n",
-                               th_zone->therm_dev->polling_delay);
-       return 0;
-}
-
-
-/* Get trip type callback functions for thermal zone */
-static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip,
-                                enum thermal_trip_type *type)
-{
-       switch (GET_ZONE(trip)) {
-       case MONITOR_ZONE:
-       case WARN_ZONE:
-               *type = THERMAL_TRIP_ACTIVE;
-               break;
-       case PANIC_ZONE:
-               *type = THERMAL_TRIP_CRITICAL;
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-/* Get trip temperature callback functions for thermal zone */
-static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip,
-                               unsigned long *temp)
-{
-       if (trip < GET_TRIP(MONITOR_ZONE) || trip > GET_TRIP(PANIC_ZONE))
-               return -EINVAL;
-
-       *temp = th_zone->sensor_conf->trip_data.trip_val[trip];
-       /* convert the temperature into millicelsius */
-       *temp = *temp * MCELSIUS;
-
-       return 0;
-}
-
-/* Get critical temperature callback functions for thermal zone */
-static int exynos_get_crit_temp(struct thermal_zone_device *thermal,
-                               unsigned long *temp)
-{
-       int ret;
-       /* Panic zone */
-       ret = exynos_get_trip_temp(thermal, GET_TRIP(PANIC_ZONE), temp);
-       return ret;
-}
-
-/* Bind callback functions for thermal zone */
-static int exynos_bind(struct thermal_zone_device *thermal,
-                       struct thermal_cooling_device *cdev)
-{
-       int ret = 0, i, tab_size, level;
-       struct freq_clip_table *tab_ptr, *clip_data;
-       struct thermal_sensor_conf *data = th_zone->sensor_conf;
-
-       tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data;
-       tab_size = data->cooling_data.freq_clip_count;
-
-       if (tab_ptr == NULL || tab_size == 0)
-               return -EINVAL;
-
-       /* find the cooling device registered*/
-       for (i = 0; i < th_zone->cool_dev_size; i++)
-               if (cdev == th_zone->cool_dev[i])
-                       break;
-
-       /* No matching cooling device */
-       if (i == th_zone->cool_dev_size)
-               return 0;
-
-       /* Bind the thermal zone to the cpufreq cooling device */
-       for (i = 0; i < tab_size; i++) {
-               clip_data = (struct freq_clip_table *)&(tab_ptr[i]);
-               level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max);
-               if (level == THERMAL_CSTATE_INVALID)
-                       return 0;
-               switch (GET_ZONE(i)) {
-               case MONITOR_ZONE:
-               case WARN_ZONE:
-                       if (thermal_zone_bind_cooling_device(thermal, i, cdev,
-                                                               level, 0)) {
-                               pr_err("error binding cdev inst %d\n", i);
-                               ret = -EINVAL;
-                       }
-                       th_zone->bind = true;
-                       break;
-               default:
-                       ret = -EINVAL;
-               }
-       }
-
-       return ret;
-}
-
-/* Unbind callback functions for thermal zone */
-static int exynos_unbind(struct thermal_zone_device *thermal,
-                       struct thermal_cooling_device *cdev)
-{
-       int ret = 0, i, tab_size;
-       struct thermal_sensor_conf *data = th_zone->sensor_conf;
-
-       if (th_zone->bind == false)
-               return 0;
-
-       tab_size = data->cooling_data.freq_clip_count;
-
-       if (tab_size == 0)
-               return -EINVAL;
-
-       /* find the cooling device registered*/
-       for (i = 0; i < th_zone->cool_dev_size; i++)
-               if (cdev == th_zone->cool_dev[i])
-                       break;
-
-       /* No matching cooling device */
-       if (i == th_zone->cool_dev_size)
-               return 0;
-
-       /* Bind the thermal zone to the cpufreq cooling device */
-       for (i = 0; i < tab_size; i++) {
-               switch (GET_ZONE(i)) {
-               case MONITOR_ZONE:
-               case WARN_ZONE:
-                       if (thermal_zone_unbind_cooling_device(thermal, i,
-                                                               cdev)) {
-                               pr_err("error unbinding cdev inst=%d\n", i);
-                               ret = -EINVAL;
-                       }
-                       th_zone->bind = false;
-                       break;
-               default:
-                       ret = -EINVAL;
-               }
-       }
-       return ret;
-}
-
-/* Get temperature callback functions for thermal zone */
-static int exynos_get_temp(struct thermal_zone_device *thermal,
-                       unsigned long *temp)
-{
-       void *data;
-
-       if (!th_zone->sensor_conf) {
-               pr_info("Temperature sensor not initialised\n");
-               return -EINVAL;
-       }
-       data = th_zone->sensor_conf->private_data;
-       *temp = th_zone->sensor_conf->read_temperature(data);
-       /* convert the temperature into millicelsius */
-       *temp = *temp * MCELSIUS;
-       return 0;
-}
-
-/* Get temperature callback functions for thermal zone */
-static int exynos_set_emul_temp(struct thermal_zone_device *thermal,
-                                               unsigned long temp)
-{
-       void *data;
-       int ret = -EINVAL;
-
-       if (!th_zone->sensor_conf) {
-               pr_info("Temperature sensor not initialised\n");
-               return -EINVAL;
-       }
-       data = th_zone->sensor_conf->private_data;
-       if (th_zone->sensor_conf->write_emul_temp)
-               ret = th_zone->sensor_conf->write_emul_temp(data, temp);
-       return ret;
-}
-
-/* Get the temperature trend */
-static int exynos_get_trend(struct thermal_zone_device *thermal,
-                       int trip, enum thermal_trend *trend)
-{
-       int ret;
-       unsigned long trip_temp;
-
-       ret = exynos_get_trip_temp(thermal, trip, &trip_temp);
-       if (ret < 0)
-               return ret;
-
-       if (thermal->temperature >= trip_temp)
-               *trend = THERMAL_TREND_RAISE_FULL;
-       else
-               *trend = THERMAL_TREND_DROP_FULL;
-
-       return 0;
-}
-/* Operation callback functions for thermal zone */
-static struct thermal_zone_device_ops const exynos_dev_ops = {
-       .bind = exynos_bind,
-       .unbind = exynos_unbind,
-       .get_temp = exynos_get_temp,
-       .set_emul_temp = exynos_set_emul_temp,
-       .get_trend = exynos_get_trend,
-       .get_mode = exynos_get_mode,
-       .set_mode = exynos_set_mode,
-       .get_trip_type = exynos_get_trip_type,
-       .get_trip_temp = exynos_get_trip_temp,
-       .get_crit_temp = exynos_get_crit_temp,
-};
-
-/*
- * This function may be called from interrupt based temperature sensor
- * when threshold is changed.
- */
-static void exynos_report_trigger(void)
-{
-       unsigned int i;
-       char data[10];
-       char *envp[] = { data, NULL };
-
-       if (!th_zone || !th_zone->therm_dev)
-               return;
-       if (th_zone->bind == false) {
-               for (i = 0; i < th_zone->cool_dev_size; i++) {
-                       if (!th_zone->cool_dev[i])
-                               continue;
-                       exynos_bind(th_zone->therm_dev,
-                                       th_zone->cool_dev[i]);
-               }
-       }
-
-       thermal_zone_device_update(th_zone->therm_dev);
-
-       mutex_lock(&th_zone->therm_dev->lock);
-       /* Find the level for which trip happened */
-       for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) {
-               if (th_zone->therm_dev->last_temperature <
-                       th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS)
-                       break;
-       }
-
-       if (th_zone->mode == THERMAL_DEVICE_ENABLED &&
-               !th_zone->sensor_conf->trip_data.trigger_falling) {
-               if (i > 0)
-                       th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL;
-               else
-                       th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
-       }
-
-       snprintf(data, sizeof(data), "%u", i);
-       kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp);
-       mutex_unlock(&th_zone->therm_dev->lock);
-}
-
-/* Register with the in-kernel thermal management */
-static int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
-{
-       int ret;
-       struct cpumask mask_val;
-
-       if (!sensor_conf || !sensor_conf->read_temperature) {
-               pr_err("Temperature sensor not initialised\n");
-               return -EINVAL;
-       }
-
-       th_zone = kzalloc(sizeof(struct exynos_thermal_zone), GFP_KERNEL);
-       if (!th_zone)
-               return -ENOMEM;
-
-       th_zone->sensor_conf = sensor_conf;
-       cpumask_set_cpu(0, &mask_val);
-       th_zone->cool_dev[0] = cpufreq_cooling_register(&mask_val);
-       if (IS_ERR(th_zone->cool_dev[0])) {
-               pr_err("Failed to register cpufreq cooling device\n");
-               ret = -EINVAL;
-               goto err_unregister;
-       }
-       th_zone->cool_dev_size++;
-
-       th_zone->therm_dev = thermal_zone_device_register(sensor_conf->name,
-                       EXYNOS_ZONE_COUNT, 0, NULL, &exynos_dev_ops, NULL, 0,
-                       sensor_conf->trip_data.trigger_falling ?
-                       0 : IDLE_INTERVAL);
-
-       if (IS_ERR(th_zone->therm_dev)) {
-               pr_err("Failed to register thermal zone device\n");
-               ret = PTR_ERR(th_zone->therm_dev);
-               goto err_unregister;
-       }
-       th_zone->mode = THERMAL_DEVICE_ENABLED;
-
-       pr_info("Exynos: Kernel Thermal management registered\n");
-
-       return 0;
-
-err_unregister:
-       exynos_unregister_thermal();
-       return ret;
-}
-
-/* Un-Register with the in-kernel thermal management */
-static void exynos_unregister_thermal(void)
-{
-       int i;
-
-       if (!th_zone)
-               return;
-
-       if (th_zone->therm_dev)
-               thermal_zone_device_unregister(th_zone->therm_dev);
-
-       for (i = 0; i < th_zone->cool_dev_size; i++) {
-               if (th_zone->cool_dev[i])
-                       cpufreq_cooling_unregister(th_zone->cool_dev[i]);
-       }
-
-       kfree(th_zone);
-       pr_info("Exynos: Kernel Thermal management unregistered\n");
-}
-
-/*
- * TMU treats temperature as a mapped temperature code.
- * The temperature is converted differently depending on the calibration type.
- */
-static int temp_to_code(struct exynos_tmu_data *data, u8 temp)
-{
-       struct exynos_tmu_platform_data *pdata = data->pdata;
-       int temp_code;
-
-       if (data->soc == SOC_ARCH_EXYNOS4210)
-               /* temp should range between 25 and 125 */
-               if (temp < 25 || temp > 125) {
-                       temp_code = -EINVAL;
-                       goto out;
-               }
-
-       switch (pdata->cal_type) {
-       case TYPE_TWO_POINT_TRIMMING:
-               temp_code = (temp - 25) *
-                   (data->temp_error2 - data->temp_error1) /
-                   (85 - 25) + data->temp_error1;
-               break;
-       case TYPE_ONE_POINT_TRIMMING:
-               temp_code = temp + data->temp_error1 - 25;
-               break;
-       default:
-               temp_code = temp + EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET;
-               break;
-       }
-out:
-       return temp_code;
-}
-
-/*
- * Calculate a temperature value from a temperature code.
- * The unit of the temperature is degree Celsius.
- */
-static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code)
-{
-       struct exynos_tmu_platform_data *pdata = data->pdata;
-       int temp;
-
-       if (data->soc == SOC_ARCH_EXYNOS4210)
-               /* temp_code should range between 75 and 175 */
-               if (temp_code < 75 || temp_code > 175) {
-                       temp = -ENODATA;
-                       goto out;
-               }
-
-       switch (pdata->cal_type) {
-       case TYPE_TWO_POINT_TRIMMING:
-               temp = (temp_code - data->temp_error1) * (85 - 25) /
-                   (data->temp_error2 - data->temp_error1) + 25;
-               break;
-       case TYPE_ONE_POINT_TRIMMING:
-               temp = temp_code - data->temp_error1 + 25;
-               break;
-       default:
-               temp = temp_code - EXYNOS_TMU_DEF_CODE_TO_TEMP_OFFSET;
-               break;
-       }
-out:
-       return temp;
-}
-
-static int exynos_tmu_initialize(struct platform_device *pdev)
-{
-       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
-       struct exynos_tmu_platform_data *pdata = data->pdata;
-       unsigned int status, trim_info;
-       unsigned int rising_threshold = 0, falling_threshold = 0;
-       int ret = 0, threshold_code, i, trigger_levs = 0;
-
-       mutex_lock(&data->lock);
-       clk_enable(data->clk);
-
-       status = readb(data->base + EXYNOS_TMU_REG_STATUS);
-       if (!status) {
-               ret = -EBUSY;
-               goto out;
-       }
-
-       if (data->soc == SOC_ARCH_EXYNOS) {
-               __raw_writel(EXYNOS_TRIMINFO_RELOAD,
-                               data->base + EXYNOS_TMU_TRIMINFO_CON);
-       }
-       /* Save trimming info in order to perform calibration */
-       trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO);
-       data->temp_error1 = trim_info & EXYNOS_TMU_TRIM_TEMP_MASK;
-       data->temp_error2 = ((trim_info >> 8) & EXYNOS_TMU_TRIM_TEMP_MASK);
-
-       if ((EFUSE_MIN_VALUE > data->temp_error1) ||
-                       (data->temp_error1 > EFUSE_MAX_VALUE) ||
-                       (data->temp_error2 != 0))
-               data->temp_error1 = pdata->efuse_value;
-
-       /* Count trigger levels to be enabled */
-       for (i = 0; i < MAX_THRESHOLD_LEVS; i++)
-               if (pdata->trigger_levels[i])
-                       trigger_levs++;
-
-       if (data->soc == SOC_ARCH_EXYNOS4210) {
-               /* Write temperature code for threshold */
-               threshold_code = temp_to_code(data, pdata->threshold);
-               if (threshold_code < 0) {
-                       ret = threshold_code;
-                       goto out;
-               }
-               writeb(threshold_code,
-                       data->base + EXYNOS4210_TMU_REG_THRESHOLD_TEMP);
-               for (i = 0; i < trigger_levs; i++)
-                       writeb(pdata->trigger_levels[i],
-                       data->base + EXYNOS4210_TMU_REG_TRIG_LEVEL0 + i * 4);
-
-               writel(EXYNOS4210_TMU_INTCLEAR_VAL,
-                       data->base + EXYNOS_TMU_REG_INTCLEAR);
-       } else if (data->soc == SOC_ARCH_EXYNOS) {
-               /* Write temperature code for rising and falling threshold */
-               for (i = 0; i < trigger_levs; i++) {
-                       threshold_code = temp_to_code(data,
-                                               pdata->trigger_levels[i]);
-                       if (threshold_code < 0) {
-                               ret = threshold_code;
-                               goto out;
-                       }
-                       rising_threshold |= threshold_code << 8 * i;
-                       if (pdata->threshold_falling) {
-                               threshold_code = temp_to_code(data,
-                                               pdata->trigger_levels[i] -
-                                               pdata->threshold_falling);
-                               if (threshold_code > 0)
-                                       falling_threshold |=
-                                               threshold_code << 8 * i;
-                       }
-               }
-
-               writel(rising_threshold,
-                               data->base + EXYNOS_THD_TEMP_RISE);
-               writel(falling_threshold,
-                               data->base + EXYNOS_THD_TEMP_FALL);
-
-               writel(EXYNOS_TMU_CLEAR_RISE_INT | EXYNOS_TMU_CLEAR_FALL_INT,
-                               data->base + EXYNOS_TMU_REG_INTCLEAR);
-       }
-out:
-       clk_disable(data->clk);
-       mutex_unlock(&data->lock);
-
-       return ret;
-}
-
-static void exynos_tmu_control(struct platform_device *pdev, bool on)
-{
-       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
-       struct exynos_tmu_platform_data *pdata = data->pdata;
-       unsigned int con, interrupt_en;
-
-       mutex_lock(&data->lock);
-       clk_enable(data->clk);
-
-       con = pdata->reference_voltage << EXYNOS_TMU_REF_VOLTAGE_SHIFT |
-               pdata->gain << EXYNOS_TMU_GAIN_SHIFT;
-
-       if (data->soc == SOC_ARCH_EXYNOS) {
-               con |= pdata->noise_cancel_mode << EXYNOS_TMU_TRIP_MODE_SHIFT;
-               con |= (EXYNOS_MUX_ADDR_VALUE << EXYNOS_MUX_ADDR_SHIFT);
-       }
-
-       if (on) {
-               con |= EXYNOS_TMU_CORE_ON;
-               interrupt_en = pdata->trigger_level3_en << 12 |
-                       pdata->trigger_level2_en << 8 |
-                       pdata->trigger_level1_en << 4 |
-                       pdata->trigger_level0_en;
-               if (pdata->threshold_falling)
-                       interrupt_en |= interrupt_en << 16;
-       } else {
-               con |= EXYNOS_TMU_CORE_OFF;
-               interrupt_en = 0; /* Disable all interrupts */
-       }
-       writel(interrupt_en, data->base + EXYNOS_TMU_REG_INTEN);
-       writel(con, data->base + EXYNOS_TMU_REG_CONTROL);
-
-       clk_disable(data->clk);
-       mutex_unlock(&data->lock);
-}
-
-static int exynos_tmu_read(struct exynos_tmu_data *data)
-{
-       u8 temp_code;
-       int temp;
-
-       mutex_lock(&data->lock);
-       clk_enable(data->clk);
-
-       temp_code = readb(data->base + EXYNOS_TMU_REG_CURRENT_TEMP);
-       temp = code_to_temp(data, temp_code);
-
-       clk_disable(data->clk);
-       mutex_unlock(&data->lock);
-
-       return temp;
-}
-
-#ifdef CONFIG_THERMAL_EMULATION
-static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
-{
-       struct exynos_tmu_data *data = drv_data;
-       unsigned int reg;
-       int ret = -EINVAL;
-
-       if (data->soc == SOC_ARCH_EXYNOS4210)
-               goto out;
-
-       if (temp && temp < MCELSIUS)
-               goto out;
-
-       mutex_lock(&data->lock);
-       clk_enable(data->clk);
-
-       reg = readl(data->base + EXYNOS_EMUL_CON);
-
-       if (temp) {
-               temp /= MCELSIUS;
-
-               reg = (EXYNOS_EMUL_TIME << EXYNOS_EMUL_TIME_SHIFT) |
-                       (temp_to_code(data, temp)
-                        << EXYNOS_EMUL_DATA_SHIFT) | EXYNOS_EMUL_ENABLE;
-       } else {
-               reg &= ~EXYNOS_EMUL_ENABLE;
-       }
-
-       writel(reg, data->base + EXYNOS_EMUL_CON);
-
-       clk_disable(data->clk);
-       mutex_unlock(&data->lock);
-       return 0;
-out:
-       return ret;
-}
-#else
-static int exynos_tmu_set_emulation(void *drv_data,    unsigned long temp)
-       { return -EINVAL; }
-#endif/*CONFIG_THERMAL_EMULATION*/
-
-static void exynos_tmu_work(struct work_struct *work)
-{
-       struct exynos_tmu_data *data = container_of(work,
-                       struct exynos_tmu_data, irq_work);
-
-       exynos_report_trigger();
-       mutex_lock(&data->lock);
-       clk_enable(data->clk);
-       if (data->soc == SOC_ARCH_EXYNOS)
-               writel(EXYNOS_TMU_CLEAR_RISE_INT |
-                               EXYNOS_TMU_CLEAR_FALL_INT,
-                               data->base + EXYNOS_TMU_REG_INTCLEAR);
-       else
-               writel(EXYNOS4210_TMU_INTCLEAR_VAL,
-                               data->base + EXYNOS_TMU_REG_INTCLEAR);
-       clk_disable(data->clk);
-       mutex_unlock(&data->lock);
-
-       enable_irq(data->irq);
-}
-
-static irqreturn_t exynos_tmu_irq(int irq, void *id)
-{
-       struct exynos_tmu_data *data = id;
-
-       disable_irq_nosync(irq);
-       schedule_work(&data->irq_work);
-
-       return IRQ_HANDLED;
-}
-static struct thermal_sensor_conf exynos_sensor_conf = {
-       .name                   = "exynos-therm",
-       .read_temperature       = (int (*)(void *))exynos_tmu_read,
-       .write_emul_temp        = exynos_tmu_set_emulation,
-};
-
-#if defined(CONFIG_CPU_EXYNOS4210)
-static struct exynos_tmu_platform_data const exynos4210_default_tmu_data = {
-       .threshold = 80,
-       .trigger_levels[0] = 5,
-       .trigger_levels[1] = 20,
-       .trigger_levels[2] = 30,
-       .trigger_level0_en = 1,
-       .trigger_level1_en = 1,
-       .trigger_level2_en = 1,
-       .trigger_level3_en = 0,
-       .gain = 15,
-       .reference_voltage = 7,
-       .cal_type = TYPE_ONE_POINT_TRIMMING,
-       .freq_tab[0] = {
-               .freq_clip_max = 800 * 1000,
-               .temp_level = 85,
-       },
-       .freq_tab[1] = {
-               .freq_clip_max = 200 * 1000,
-               .temp_level = 100,
-       },
-       .freq_tab_count = 2,
-       .type = SOC_ARCH_EXYNOS4210,
-};
-#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data)
-#else
-#define EXYNOS4210_TMU_DRV_DATA (NULL)
-#endif
-
-#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412) || \
-       defined(CONFIG_SOC_EXYNOS4212)
-static struct exynos_tmu_platform_data const exynos_default_tmu_data = {
-       .threshold_falling = 10,
-       .trigger_levels[0] = 85,
-       .trigger_levels[1] = 103,
-       .trigger_levels[2] = 110,
-       .trigger_level0_en = 1,
-       .trigger_level1_en = 1,
-       .trigger_level2_en = 1,
-       .trigger_level3_en = 0,
-       .gain = 8,
-       .reference_voltage = 16,
-       .noise_cancel_mode = 4,
-       .cal_type = TYPE_ONE_POINT_TRIMMING,
-       .efuse_value = 55,
-       .freq_tab[0] = {
-               .freq_clip_max = 800 * 1000,
-               .temp_level = 85,
-       },
-       .freq_tab[1] = {
-               .freq_clip_max = 200 * 1000,
-               .temp_level = 103,
-       },
-       .freq_tab_count = 2,
-       .type = SOC_ARCH_EXYNOS,
-};
-#define EXYNOS_TMU_DRV_DATA (&exynos_default_tmu_data)
-#else
-#define EXYNOS_TMU_DRV_DATA (NULL)
-#endif
-
-#ifdef CONFIG_OF
-static const struct of_device_id exynos_tmu_match[] = {
-       {
-               .compatible = "samsung,exynos4210-tmu",
-               .data = (void *)EXYNOS4210_TMU_DRV_DATA,
-       },
-       {
-               .compatible = "samsung,exynos4412-tmu",
-               .data = (void *)EXYNOS_TMU_DRV_DATA,
-       },
-       {
-               .compatible = "samsung,exynos5250-tmu",
-               .data = (void *)EXYNOS_TMU_DRV_DATA,
-       },
-       {},
-};
-MODULE_DEVICE_TABLE(of, exynos_tmu_match);
-#endif
-
-static struct platform_device_id exynos_tmu_driver_ids[] = {
-       {
-               .name           = "exynos4210-tmu",
-               .driver_data    = (kernel_ulong_t)EXYNOS4210_TMU_DRV_DATA,
-       },
-       {
-               .name           = "exynos5250-tmu",
-               .driver_data    = (kernel_ulong_t)EXYNOS_TMU_DRV_DATA,
-       },
-       { },
-};
-MODULE_DEVICE_TABLE(platform, exynos_tmu_driver_ids);
-
-static inline struct  exynos_tmu_platform_data *exynos_get_driver_data(
-                       struct platform_device *pdev)
-{
-#ifdef CONFIG_OF
-       if (pdev->dev.of_node) {
-               const struct of_device_id *match;
-               match = of_match_node(exynos_tmu_match, pdev->dev.of_node);
-               if (!match)
-                       return NULL;
-               return (struct exynos_tmu_platform_data *) match->data;
-       }
-#endif
-       return (struct exynos_tmu_platform_data *)
-                       platform_get_device_id(pdev)->driver_data;
-}
-
-static int exynos_tmu_probe(struct platform_device *pdev)
-{
-       struct exynos_tmu_data *data;
-       struct exynos_tmu_platform_data *pdata = pdev->dev.platform_data;
-       int ret, i;
-
-       if (!pdata)
-               pdata = exynos_get_driver_data(pdev);
-
-       if (!pdata) {
-               dev_err(&pdev->dev, "No platform init data supplied.\n");
-               return -ENODEV;
-       }
-       data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data),
-                                       GFP_KERNEL);
-       if (!data) {
-               dev_err(&pdev->dev, "Failed to allocate driver structure\n");
-               return -ENOMEM;
-       }
-
-       data->irq = platform_get_irq(pdev, 0);
-       if (data->irq < 0) {
-               dev_err(&pdev->dev, "Failed to get platform irq\n");
-               return data->irq;
-       }
-
-       INIT_WORK(&data->irq_work, exynos_tmu_work);
-
-       data->mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       data->base = devm_ioremap_resource(&pdev->dev, data->mem);
-       if (IS_ERR(data->base))
-               return PTR_ERR(data->base);
-
-       ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq,
-               IRQF_TRIGGER_RISING, "exynos-tmu", data);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq);
-               return ret;
-       }
-
-       data->clk = devm_clk_get(&pdev->dev, "tmu_apbif");
-       if (IS_ERR(data->clk)) {
-               dev_err(&pdev->dev, "Failed to get clock\n");
-               return  PTR_ERR(data->clk);
-       }
-
-       ret = clk_prepare(data->clk);
-       if (ret)
-               return ret;
-
-       if (pdata->type == SOC_ARCH_EXYNOS ||
-                               pdata->type == SOC_ARCH_EXYNOS4210)
-               data->soc = pdata->type;
-       else {
-               ret = -EINVAL;
-               dev_err(&pdev->dev, "Platform not supported\n");
-               goto err_clk;
-       }
-
-       data->pdata = pdata;
-       platform_set_drvdata(pdev, data);
-       mutex_init(&data->lock);
-
-       ret = exynos_tmu_initialize(pdev);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to initialize TMU\n");
-               goto err_clk;
-       }
-
-       exynos_tmu_control(pdev, true);
-
-       /* Register the sensor with thermal management interface */
-       (&exynos_sensor_conf)->private_data = data;
-       exynos_sensor_conf.trip_data.trip_count = pdata->trigger_level0_en +
-                       pdata->trigger_level1_en + pdata->trigger_level2_en +
-                       pdata->trigger_level3_en;
-
-       for (i = 0; i < exynos_sensor_conf.trip_data.trip_count; i++)
-               exynos_sensor_conf.trip_data.trip_val[i] =
-                       pdata->threshold + pdata->trigger_levels[i];
-
-       exynos_sensor_conf.trip_data.trigger_falling = pdata->threshold_falling;
-
-       exynos_sensor_conf.cooling_data.freq_clip_count =
-                                               pdata->freq_tab_count;
-       for (i = 0; i < pdata->freq_tab_count; i++) {
-               exynos_sensor_conf.cooling_data.freq_data[i].freq_clip_max =
-                                       pdata->freq_tab[i].freq_clip_max;
-               exynos_sensor_conf.cooling_data.freq_data[i].temp_level =
-                                       pdata->freq_tab[i].temp_level;
-       }
-
-       ret = exynos_register_thermal(&exynos_sensor_conf);
-       if (ret) {
-               dev_err(&pdev->dev, "Failed to register thermal interface\n");
-               goto err_clk;
-       }
-
-       return 0;
-err_clk:
-       clk_unprepare(data->clk);
-       return ret;
-}
-
-static int exynos_tmu_remove(struct platform_device *pdev)
-{
-       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
-
-       exynos_tmu_control(pdev, false);
-
-       exynos_unregister_thermal();
-
-       clk_unprepare(data->clk);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int exynos_tmu_suspend(struct device *dev)
-{
-       exynos_tmu_control(to_platform_device(dev), false);
-
-       return 0;
-}
-
-static int exynos_tmu_resume(struct device *dev)
-{
-       struct platform_device *pdev = to_platform_device(dev);
-
-       exynos_tmu_initialize(pdev);
-       exynos_tmu_control(pdev, true);
-
-       return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(exynos_tmu_pm,
-                        exynos_tmu_suspend, exynos_tmu_resume);
-#define EXYNOS_TMU_PM  (&exynos_tmu_pm)
-#else
-#define EXYNOS_TMU_PM  NULL
-#endif
-
-static struct platform_driver exynos_tmu_driver = {
-       .driver = {
-               .name   = "exynos-tmu",
-               .owner  = THIS_MODULE,
-               .pm     = EXYNOS_TMU_PM,
-               .of_match_table = of_match_ptr(exynos_tmu_match),
-       },
-       .probe = exynos_tmu_probe,
-       .remove = exynos_tmu_remove,
-       .id_table = exynos_tmu_driver_ids,
-};
-
-module_platform_driver(exynos_tmu_driver);
-
-MODULE_DESCRIPTION("EXYNOS TMU Driver");
-MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:exynos-tmu");
diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c
new file mode 100644 (file)
index 0000000..1d6c801
--- /dev/null
@@ -0,0 +1,541 @@
+/*
+ * Copyright 2013 Freescale Semiconductor, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/cpu_cooling.h>
+#include <linux/cpufreq.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+
+#define REG_SET                0x4
+#define REG_CLR                0x8
+#define REG_TOG                0xc
+
+#define MISC0                          0x0150
+#define MISC0_REFTOP_SELBIASOFF                (1 << 3)
+
+#define TEMPSENSE0                     0x0180
+#define TEMPSENSE0_ALARM_VALUE_SHIFT   20
+#define TEMPSENSE0_ALARM_VALUE_MASK    (0xfff << TEMPSENSE0_ALARM_VALUE_SHIFT)
+#define TEMPSENSE0_TEMP_CNT_SHIFT      8
+#define TEMPSENSE0_TEMP_CNT_MASK       (0xfff << TEMPSENSE0_TEMP_CNT_SHIFT)
+#define TEMPSENSE0_FINISHED            (1 << 2)
+#define TEMPSENSE0_MEASURE_TEMP                (1 << 1)
+#define TEMPSENSE0_POWER_DOWN          (1 << 0)
+
+#define TEMPSENSE1                     0x0190
+#define TEMPSENSE1_MEASURE_FREQ                0xffff
+
+#define OCOTP_ANA1                     0x04e0
+
+/* The driver supports 1 passive trip point and 1 critical trip point */
+enum imx_thermal_trip {
+       IMX_TRIP_PASSIVE,
+       IMX_TRIP_CRITICAL,
+       IMX_TRIP_NUM,
+};
+
+/*
+ * It defines the temperature in millicelsius for passive trip point
+ * that will trigger cooling action when crossed.
+ */
+#define IMX_TEMP_PASSIVE               85000
+
+#define IMX_POLLING_DELAY              2000 /* millisecond */
+#define IMX_PASSIVE_DELAY              1000
+
+struct imx_thermal_data {
+       struct thermal_zone_device *tz;
+       struct thermal_cooling_device *cdev;
+       enum thermal_device_mode mode;
+       struct regmap *tempmon;
+       int c1, c2; /* See formula in imx_get_sensor_data() */
+       unsigned long temp_passive;
+       unsigned long temp_critical;
+       unsigned long alarm_temp;
+       unsigned long last_temp;
+       bool irq_enabled;
+       int irq;
+};
+
+static void imx_set_alarm_temp(struct imx_thermal_data *data,
+                              signed long alarm_temp)
+{
+       struct regmap *map = data->tempmon;
+       int alarm_value;
+
+       data->alarm_temp = alarm_temp;
+       alarm_value = (alarm_temp - data->c2) / data->c1;
+       regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_ALARM_VALUE_MASK);
+       regmap_write(map, TEMPSENSE0 + REG_SET, alarm_value <<
+                       TEMPSENSE0_ALARM_VALUE_SHIFT);
+}
+
+static int imx_get_temp(struct thermal_zone_device *tz, unsigned long *temp)
+{
+       struct imx_thermal_data *data = tz->devdata;
+       struct regmap *map = data->tempmon;
+       unsigned int n_meas;
+       bool wait;
+       u32 val;
+
+       if (data->mode == THERMAL_DEVICE_ENABLED) {
+               /* Check if a measurement is currently in progress */
+               regmap_read(map, TEMPSENSE0, &val);
+               wait = !(val & TEMPSENSE0_FINISHED);
+       } else {
+               /*
+                * Every time we measure the temperature, we will power on the
+                * temperature sensor, enable measurements, take a reading,
+                * disable measurements, power off the temperature sensor.
+                */
+               regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+               regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+               wait = true;
+       }
+
+       /*
+        * According to the temp sensor designers, it may require up to ~17us
+        * to complete a measurement.
+        */
+       if (wait)
+               usleep_range(20, 50);
+
+       regmap_read(map, TEMPSENSE0, &val);
+
+       if (data->mode != THERMAL_DEVICE_ENABLED) {
+               regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+               regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+       }
+
+       if ((val & TEMPSENSE0_FINISHED) == 0) {
+               dev_dbg(&tz->device, "temp measurement never finished\n");
+               return -EAGAIN;
+       }
+
+       n_meas = (val & TEMPSENSE0_TEMP_CNT_MASK) >> TEMPSENSE0_TEMP_CNT_SHIFT;
+
+       /* See imx_get_sensor_data() for formula derivation */
+       *temp = data->c2 + data->c1 * n_meas;
+
+       /* Update alarm value to next higher trip point */
+       if (data->alarm_temp == data->temp_passive && *temp >= data->temp_passive)
+               imx_set_alarm_temp(data, data->temp_critical);
+       if (data->alarm_temp == data->temp_critical && *temp < data->temp_passive) {
+               imx_set_alarm_temp(data, data->temp_passive);
+               dev_dbg(&tz->device, "thermal alarm off: T < %lu\n",
+                       data->alarm_temp / 1000);
+       }
+
+       if (*temp != data->last_temp) {
+               dev_dbg(&tz->device, "millicelsius: %ld\n", *temp);
+               data->last_temp = *temp;
+       }
+
+       /* Reenable alarm IRQ if temperature below alarm temperature */
+       if (!data->irq_enabled && *temp < data->alarm_temp) {
+               data->irq_enabled = true;
+               enable_irq(data->irq);
+       }
+
+       return 0;
+}
+
+static int imx_get_mode(struct thermal_zone_device *tz,
+                       enum thermal_device_mode *mode)
+{
+       struct imx_thermal_data *data = tz->devdata;
+
+       *mode = data->mode;
+
+       return 0;
+}
+
+static int imx_set_mode(struct thermal_zone_device *tz,
+                       enum thermal_device_mode mode)
+{
+       struct imx_thermal_data *data = tz->devdata;
+       struct regmap *map = data->tempmon;
+
+       if (mode == THERMAL_DEVICE_ENABLED) {
+               tz->polling_delay = IMX_POLLING_DELAY;
+               tz->passive_delay = IMX_PASSIVE_DELAY;
+
+               regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+               regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+               if (!data->irq_enabled) {
+                       data->irq_enabled = true;
+                       enable_irq(data->irq);
+               }
+       } else {
+               regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+               regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+
+               tz->polling_delay = 0;
+               tz->passive_delay = 0;
+
+               if (data->irq_enabled) {
+                       disable_irq(data->irq);
+                       data->irq_enabled = false;
+               }
+       }
+
+       data->mode = mode;
+       thermal_zone_device_update(tz);
+
+       return 0;
+}
+
+static int imx_get_trip_type(struct thermal_zone_device *tz, int trip,
+                            enum thermal_trip_type *type)
+{
+       *type = (trip == IMX_TRIP_PASSIVE) ? THERMAL_TRIP_PASSIVE :
+                                            THERMAL_TRIP_CRITICAL;
+       return 0;
+}
+
+static int imx_get_crit_temp(struct thermal_zone_device *tz,
+                            unsigned long *temp)
+{
+       struct imx_thermal_data *data = tz->devdata;
+
+       *temp = data->temp_critical;
+       return 0;
+}
+
+static int imx_get_trip_temp(struct thermal_zone_device *tz, int trip,
+                            unsigned long *temp)
+{
+       struct imx_thermal_data *data = tz->devdata;
+
+       *temp = (trip == IMX_TRIP_PASSIVE) ? data->temp_passive :
+                                            data->temp_critical;
+       return 0;
+}
+
+static int imx_set_trip_temp(struct thermal_zone_device *tz, int trip,
+                            unsigned long temp)
+{
+       struct imx_thermal_data *data = tz->devdata;
+
+       if (trip == IMX_TRIP_CRITICAL)
+               return -EPERM;
+
+       if (temp > IMX_TEMP_PASSIVE)
+               return -EINVAL;
+
+       data->temp_passive = temp;
+
+       imx_set_alarm_temp(data, temp);
+
+       return 0;
+}
+
+static int imx_bind(struct thermal_zone_device *tz,
+                   struct thermal_cooling_device *cdev)
+{
+       int ret;
+
+       ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev,
+                                              THERMAL_NO_LIMIT,
+                                              THERMAL_NO_LIMIT);
+       if (ret) {
+               dev_err(&tz->device,
+                       "binding zone %s with cdev %s failed:%d\n",
+                       tz->type, cdev->type, ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int imx_unbind(struct thermal_zone_device *tz,
+                     struct thermal_cooling_device *cdev)
+{
+       int ret;
+
+       ret = thermal_zone_unbind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev);
+       if (ret) {
+               dev_err(&tz->device,
+                       "unbinding zone %s with cdev %s failed:%d\n",
+                       tz->type, cdev->type, ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+static const struct thermal_zone_device_ops imx_tz_ops = {
+       .bind = imx_bind,
+       .unbind = imx_unbind,
+       .get_temp = imx_get_temp,
+       .get_mode = imx_get_mode,
+       .set_mode = imx_set_mode,
+       .get_trip_type = imx_get_trip_type,
+       .get_trip_temp = imx_get_trip_temp,
+       .get_crit_temp = imx_get_crit_temp,
+       .set_trip_temp = imx_set_trip_temp,
+};
+
+static int imx_get_sensor_data(struct platform_device *pdev)
+{
+       struct imx_thermal_data *data = platform_get_drvdata(pdev);
+       struct regmap *map;
+       int t1, t2, n1, n2;
+       int ret;
+       u32 val;
+
+       map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node,
+                                             "fsl,tempmon-data");
+       if (IS_ERR(map)) {
+               ret = PTR_ERR(map);
+               dev_err(&pdev->dev, "failed to get sensor regmap: %d\n", ret);
+               return ret;
+       }
+
+       ret = regmap_read(map, OCOTP_ANA1, &val);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to read sensor data: %d\n", ret);
+               return ret;
+       }
+
+       if (val == 0 || val == ~0) {
+               dev_err(&pdev->dev, "invalid sensor calibration data\n");
+               return -EINVAL;
+       }
+
+       /*
+        * Sensor data layout:
+        *   [31:20] - sensor value @ 25C
+        *    [19:8] - sensor value of hot
+        *     [7:0] - hot temperature value
+        */
+       n1 = val >> 20;
+       n2 = (val & 0xfff00) >> 8;
+       t2 = val & 0xff;
+       t1 = 25; /* t1 always 25C */
+
+       /*
+        * Derived from linear interpolation,
+        * Tmeas = T2 + (Nmeas - N2) * (T1 - T2) / (N1 - N2)
+        * We want to reduce this down to the minimum computation necessary
+        * for each temperature read.  Also, we want Tmeas in millicelsius
+        * and we don't want to lose precision from integer division. So...
+        * milli_Tmeas = 1000 * T2 + 1000 * (Nmeas - N2) * (T1 - T2) / (N1 - N2)
+        * Let constant c1 = 1000 * (T1 - T2) / (N1 - N2)
+        * milli_Tmeas = (1000 * T2) + c1 * (Nmeas - N2)
+        * milli_Tmeas = (1000 * T2) + (c1 * Nmeas) - (c1 * N2)
+        * Let constant c2 = (1000 * T2) - (c1 * N2)
+        * milli_Tmeas = c2 + (c1 * Nmeas)
+        */
+       data->c1 = 1000 * (t1 - t2) / (n1 - n2);
+       data->c2 = 1000 * t2 - data->c1 * n2;
+
+       /*
+        * Set the default passive cooling trip point to 20 Â°C below the
+        * maximum die temperature. Can be changed from userspace.
+        */
+       data->temp_passive = 1000 * (t2 - 20);
+
+       /*
+        * The maximum die temperature is t2, let's give 5 Â°C cushion
+        * for noise and possible temperature rise between measurements.
+        */
+       data->temp_critical = 1000 * (t2 - 5);
+
+       return 0;
+}
+
+static irqreturn_t imx_thermal_alarm_irq(int irq, void *dev)
+{
+       struct imx_thermal_data *data = dev;
+
+       disable_irq_nosync(irq);
+       data->irq_enabled = false;
+
+       return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t imx_thermal_alarm_irq_thread(int irq, void *dev)
+{
+       struct imx_thermal_data *data = dev;
+
+       dev_dbg(&data->tz->device, "THERMAL ALARM: T > %lu\n",
+               data->alarm_temp / 1000);
+
+       thermal_zone_device_update(data->tz);
+
+       return IRQ_HANDLED;
+}
+
+static int imx_thermal_probe(struct platform_device *pdev)
+{
+       struct imx_thermal_data *data;
+       struct cpumask clip_cpus;
+       struct regmap *map;
+       int measure_freq;
+       int ret;
+
+       data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       map = syscon_regmap_lookup_by_phandle(pdev->dev.of_node, "fsl,tempmon");
+       if (IS_ERR(map)) {
+               ret = PTR_ERR(map);
+               dev_err(&pdev->dev, "failed to get tempmon regmap: %d\n", ret);
+               return ret;
+       }
+       data->tempmon = map;
+
+       data->irq = platform_get_irq(pdev, 0);
+       if (data->irq < 0)
+               return data->irq;
+
+       ret = devm_request_threaded_irq(&pdev->dev, data->irq,
+                       imx_thermal_alarm_irq, imx_thermal_alarm_irq_thread,
+                       0, "imx_thermal", data);
+       if (ret < 0) {
+               dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
+               return ret;
+       }
+
+       platform_set_drvdata(pdev, data);
+
+       ret = imx_get_sensor_data(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "failed to get sensor data\n");
+               return ret;
+       }
+
+       /* Make sure sensor is in known good state for measurements */
+       regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+       regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_MEASURE_TEMP);
+       regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
+       regmap_write(map, MISC0 + REG_SET, MISC0_REFTOP_SELBIASOFF);
+       regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+
+       cpumask_set_cpu(0, &clip_cpus);
+       data->cdev = cpufreq_cooling_register(&clip_cpus);
+       if (IS_ERR(data->cdev)) {
+               ret = PTR_ERR(data->cdev);
+               dev_err(&pdev->dev,
+                       "failed to register cpufreq cooling device: %d\n", ret);
+               return ret;
+       }
+
+       data->tz = thermal_zone_device_register("imx_thermal_zone",
+                                               IMX_TRIP_NUM,
+                                               BIT(IMX_TRIP_PASSIVE), data,
+                                               &imx_tz_ops, NULL,
+                                               IMX_PASSIVE_DELAY,
+                                               IMX_POLLING_DELAY);
+       if (IS_ERR(data->tz)) {
+               ret = PTR_ERR(data->tz);
+               dev_err(&pdev->dev,
+                       "failed to register thermal zone device %d\n", ret);
+               cpufreq_cooling_unregister(data->cdev);
+               return ret;
+       }
+
+       /* Enable measurements at ~ 10 Hz */
+       regmap_write(map, TEMPSENSE1 + REG_CLR, TEMPSENSE1_MEASURE_FREQ);
+       measure_freq = DIV_ROUND_UP(32768, 10); /* 10 Hz */
+       regmap_write(map, TEMPSENSE1 + REG_SET, measure_freq);
+       imx_set_alarm_temp(data, data->temp_passive);
+       regmap_write(map, TEMPSENSE0 + REG_CLR, TEMPSENSE0_POWER_DOWN);
+       regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_MEASURE_TEMP);
+
+       data->irq_enabled = true;
+       data->mode = THERMAL_DEVICE_ENABLED;
+
+       return 0;
+}
+
+static int imx_thermal_remove(struct platform_device *pdev)
+{
+       struct imx_thermal_data *data = platform_get_drvdata(pdev);
+       struct regmap *map = data->tempmon;
+
+       /* Disable measurements */
+       regmap_write(map, TEMPSENSE0 + REG_SET, TEMPSENSE0_POWER_DOWN);
+
+       thermal_zone_device_unregister(data->tz);
+       cpufreq_cooling_unregister(data->cdev);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int imx_thermal_suspend(struct device *dev)
+{
+       struct imx_thermal_data *data = dev_get_drvdata(dev);
+       struct regmap *map = data->tempmon;
+       u32 val;
+
+       regmap_read(map, TEMPSENSE0, &val);
+       if ((val & TEMPSENSE0_POWER_DOWN) == 0) {
+               /*
+                * If a measurement is taking place, wait for a long enough
+                * time for it to finish, and then check again.  If it still
+                * does not finish, something must go wrong.
+                */
+               udelay(50);
+               regmap_read(map, TEMPSENSE0, &val);
+               if ((val & TEMPSENSE0_POWER_DOWN) == 0)
+                       return -ETIMEDOUT;
+       }
+
+       return 0;
+}
+
+static int imx_thermal_resume(struct device *dev)
+{
+       /* Nothing to do for now */
+       return 0;
+}
+#endif
+
+static SIMPLE_DEV_PM_OPS(imx_thermal_pm_ops,
+                        imx_thermal_suspend, imx_thermal_resume);
+
+static const struct of_device_id of_imx_thermal_match[] = {
+       { .compatible = "fsl,imx6q-tempmon", },
+       { /* end */ }
+};
+
+static struct platform_driver imx_thermal = {
+       .driver = {
+               .name   = "imx_thermal",
+               .owner  = THIS_MODULE,
+               .pm     = &imx_thermal_pm_ops,
+               .of_match_table = of_imx_thermal_match,
+       },
+       .probe          = imx_thermal_probe,
+       .remove         = imx_thermal_remove,
+};
+module_platform_driver(imx_thermal);
+
+MODULE_AUTHOR("Freescale Semiconductor, Inc.");
+MODULE_DESCRIPTION("Thermal driver for Freescale i.MX SoCs");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:imx-thermal");
diff --git a/drivers/thermal/samsung/Kconfig b/drivers/thermal/samsung/Kconfig
new file mode 100644 (file)
index 0000000..f760389
--- /dev/null
@@ -0,0 +1,18 @@
+config EXYNOS_THERMAL
+       tristate "Exynos thermal management unit driver"
+       depends on ARCH_HAS_BANDGAP && OF
+       help
+         If you say yes here you get support for the TMU (Thermal Management
+         Unit) driver for SAMSUNG EXYNOS series of SoCs. This driver initialises
+         the TMU, reports temperature and handles cooling action if defined.
+         This driver uses the Exynos core thermal APIs and TMU configuration
+         data from the supported SoCs.
+
+config EXYNOS_THERMAL_CORE
+       bool "Core thermal framework support for EXYNOS SOCs"
+       depends on EXYNOS_THERMAL
+       help
+         If you say yes here you get support for EXYNOS TMU
+         (Thermal Management Unit) common registration/unregistration
+         functions to the core thermal layer and also to use the generic
+         CPU cooling APIs.
diff --git a/drivers/thermal/samsung/Makefile b/drivers/thermal/samsung/Makefile
new file mode 100644 (file)
index 0000000..c09d830
--- /dev/null
@@ -0,0 +1,7 @@
+#
+# Samsung thermal specific Makefile
+#
+obj-$(CONFIG_EXYNOS_THERMAL)                   += exynos_thermal.o
+exynos_thermal-y                               := exynos_tmu.o
+exynos_thermal-y                               += exynos_tmu_data.o
+exynos_thermal-$(CONFIG_EXYNOS_THERMAL_CORE)   += exynos_thermal_common.o
diff --git a/drivers/thermal/samsung/exynos_thermal_common.c b/drivers/thermal/samsung/exynos_thermal_common.c
new file mode 100644 (file)
index 0000000..f10a6ad
--- /dev/null
@@ -0,0 +1,432 @@
+/*
+ * exynos_thermal_common.c - Samsung EXYNOS common thermal file
+ *
+ *  Copyright (C) 2013 Samsung Electronics
+ *  Amit Daniel Kachhap <amit.daniel@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/cpu_cooling.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+
+#include "exynos_thermal_common.h"
+
+struct exynos_thermal_zone {
+       enum thermal_device_mode mode;
+       struct thermal_zone_device *therm_dev;
+       struct thermal_cooling_device *cool_dev[MAX_COOLING_DEVICE];
+       unsigned int cool_dev_size;
+       struct platform_device *exynos4_dev;
+       struct thermal_sensor_conf *sensor_conf;
+       bool bind;
+};
+
+/* Get mode callback functions for thermal zone */
+static int exynos_get_mode(struct thermal_zone_device *thermal,
+                       enum thermal_device_mode *mode)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       if (th_zone)
+               *mode = th_zone->mode;
+       return 0;
+}
+
+/* Set mode callback functions for thermal zone */
+static int exynos_set_mode(struct thermal_zone_device *thermal,
+                       enum thermal_device_mode mode)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       if (!th_zone) {
+               dev_err(&thermal->device,
+                       "thermal zone not registered\n");
+               return 0;
+       }
+
+       mutex_lock(&thermal->lock);
+
+       if (mode == THERMAL_DEVICE_ENABLED &&
+               !th_zone->sensor_conf->trip_data.trigger_falling)
+               thermal->polling_delay = IDLE_INTERVAL;
+       else
+               thermal->polling_delay = 0;
+
+       mutex_unlock(&thermal->lock);
+
+       th_zone->mode = mode;
+       thermal_zone_device_update(thermal);
+       dev_dbg(th_zone->sensor_conf->dev,
+               "thermal polling set for duration=%d msec\n",
+               thermal->polling_delay);
+       return 0;
+}
+
+
+/* Get trip type callback functions for thermal zone */
+static int exynos_get_trip_type(struct thermal_zone_device *thermal, int trip,
+                                enum thermal_trip_type *type)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       int max_trip = th_zone->sensor_conf->trip_data.trip_count;
+       int trip_type;
+
+       if (trip < 0 || trip >= max_trip)
+               return -EINVAL;
+
+       trip_type = th_zone->sensor_conf->trip_data.trip_type[trip];
+
+       if (trip_type == SW_TRIP)
+               *type = THERMAL_TRIP_CRITICAL;
+       else if (trip_type == THROTTLE_ACTIVE)
+               *type = THERMAL_TRIP_ACTIVE;
+       else if (trip_type == THROTTLE_PASSIVE)
+               *type = THERMAL_TRIP_PASSIVE;
+       else
+               return -EINVAL;
+
+       return 0;
+}
+
+/* Get trip temperature callback functions for thermal zone */
+static int exynos_get_trip_temp(struct thermal_zone_device *thermal, int trip,
+                               unsigned long *temp)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       int max_trip = th_zone->sensor_conf->trip_data.trip_count;
+
+       if (trip < 0 || trip >= max_trip)
+               return -EINVAL;
+
+       *temp = th_zone->sensor_conf->trip_data.trip_val[trip];
+       /* convert the temperature into millicelsius */
+       *temp = *temp * MCELSIUS;
+
+       return 0;
+}
+
+/* Get critical temperature callback functions for thermal zone */
+static int exynos_get_crit_temp(struct thermal_zone_device *thermal,
+                               unsigned long *temp)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       int max_trip = th_zone->sensor_conf->trip_data.trip_count;
+       /* Get the temp of highest trip*/
+       return exynos_get_trip_temp(thermal, max_trip - 1, temp);
+}
+
+/* Bind callback functions for thermal zone */
+static int exynos_bind(struct thermal_zone_device *thermal,
+                       struct thermal_cooling_device *cdev)
+{
+       int ret = 0, i, tab_size, level;
+       struct freq_clip_table *tab_ptr, *clip_data;
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       struct thermal_sensor_conf *data = th_zone->sensor_conf;
+
+       tab_ptr = (struct freq_clip_table *)data->cooling_data.freq_data;
+       tab_size = data->cooling_data.freq_clip_count;
+
+       if (tab_ptr == NULL || tab_size == 0)
+               return 0;
+
+       /* find the cooling device registered*/
+       for (i = 0; i < th_zone->cool_dev_size; i++)
+               if (cdev == th_zone->cool_dev[i])
+                       break;
+
+       /* No matching cooling device */
+       if (i == th_zone->cool_dev_size)
+               return 0;
+
+       /* Bind the thermal zone to the cpufreq cooling device */
+       for (i = 0; i < tab_size; i++) {
+               clip_data = (struct freq_clip_table *)&(tab_ptr[i]);
+               level = cpufreq_cooling_get_level(0, clip_data->freq_clip_max);
+               if (level == THERMAL_CSTATE_INVALID)
+                       return 0;
+               switch (GET_ZONE(i)) {
+               case MONITOR_ZONE:
+               case WARN_ZONE:
+                       if (thermal_zone_bind_cooling_device(thermal, i, cdev,
+                                                               level, 0)) {
+                               dev_err(data->dev,
+                                       "error unbinding cdev inst=%d\n", i);
+                               ret = -EINVAL;
+                       }
+                       th_zone->bind = true;
+                       break;
+               default:
+                       ret = -EINVAL;
+               }
+       }
+
+       return ret;
+}
+
+/* Unbind callback functions for thermal zone */
+static int exynos_unbind(struct thermal_zone_device *thermal,
+                       struct thermal_cooling_device *cdev)
+{
+       int ret = 0, i, tab_size;
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       struct thermal_sensor_conf *data = th_zone->sensor_conf;
+
+       if (th_zone->bind == false)
+               return 0;
+
+       tab_size = data->cooling_data.freq_clip_count;
+
+       if (tab_size == 0)
+               return 0;
+
+       /* find the cooling device registered*/
+       for (i = 0; i < th_zone->cool_dev_size; i++)
+               if (cdev == th_zone->cool_dev[i])
+                       break;
+
+       /* No matching cooling device */
+       if (i == th_zone->cool_dev_size)
+               return 0;
+
+       /* Bind the thermal zone to the cpufreq cooling device */
+       for (i = 0; i < tab_size; i++) {
+               switch (GET_ZONE(i)) {
+               case MONITOR_ZONE:
+               case WARN_ZONE:
+                       if (thermal_zone_unbind_cooling_device(thermal, i,
+                                                               cdev)) {
+                               dev_err(data->dev,
+                                       "error unbinding cdev inst=%d\n", i);
+                               ret = -EINVAL;
+                       }
+                       th_zone->bind = false;
+                       break;
+               default:
+                       ret = -EINVAL;
+               }
+       }
+       return ret;
+}
+
+/* Get temperature callback functions for thermal zone */
+static int exynos_get_temp(struct thermal_zone_device *thermal,
+                       unsigned long *temp)
+{
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+       void *data;
+
+       if (!th_zone->sensor_conf) {
+               dev_err(&thermal->device,
+                       "Temperature sensor not initialised\n");
+               return -EINVAL;
+       }
+       data = th_zone->sensor_conf->driver_data;
+       *temp = th_zone->sensor_conf->read_temperature(data);
+       /* convert the temperature into millicelsius */
+       *temp = *temp * MCELSIUS;
+       return 0;
+}
+
+/* Get temperature callback functions for thermal zone */
+static int exynos_set_emul_temp(struct thermal_zone_device *thermal,
+                                               unsigned long temp)
+{
+       void *data;
+       int ret = -EINVAL;
+       struct exynos_thermal_zone *th_zone = thermal->devdata;
+
+       if (!th_zone->sensor_conf) {
+               dev_err(&thermal->device,
+                       "Temperature sensor not initialised\n");
+               return -EINVAL;
+       }
+       data = th_zone->sensor_conf->driver_data;
+       if (th_zone->sensor_conf->write_emul_temp)
+               ret = th_zone->sensor_conf->write_emul_temp(data, temp);
+       return ret;
+}
+
+/* Get the temperature trend */
+static int exynos_get_trend(struct thermal_zone_device *thermal,
+                       int trip, enum thermal_trend *trend)
+{
+       int ret;
+       unsigned long trip_temp;
+
+       ret = exynos_get_trip_temp(thermal, trip, &trip_temp);
+       if (ret < 0)
+               return ret;
+
+       if (thermal->temperature >= trip_temp)
+               *trend = THERMAL_TREND_RAISE_FULL;
+       else
+               *trend = THERMAL_TREND_DROP_FULL;
+
+       return 0;
+}
+/* Operation callback functions for thermal zone */
+static struct thermal_zone_device_ops const exynos_dev_ops = {
+       .bind = exynos_bind,
+       .unbind = exynos_unbind,
+       .get_temp = exynos_get_temp,
+       .set_emul_temp = exynos_set_emul_temp,
+       .get_trend = exynos_get_trend,
+       .get_mode = exynos_get_mode,
+       .set_mode = exynos_set_mode,
+       .get_trip_type = exynos_get_trip_type,
+       .get_trip_temp = exynos_get_trip_temp,
+       .get_crit_temp = exynos_get_crit_temp,
+};
+
+/*
+ * This function may be called from interrupt based temperature sensor
+ * when threshold is changed.
+ */
+void exynos_report_trigger(struct thermal_sensor_conf *conf)
+{
+       unsigned int i;
+       char data[10];
+       char *envp[] = { data, NULL };
+       struct exynos_thermal_zone *th_zone;
+
+       if (!conf || !conf->pzone_data) {
+               pr_err("Invalid temperature sensor configuration data\n");
+               return;
+       }
+
+       th_zone = conf->pzone_data;
+       if (th_zone->therm_dev)
+               return;
+
+       if (th_zone->bind == false) {
+               for (i = 0; i < th_zone->cool_dev_size; i++) {
+                       if (!th_zone->cool_dev[i])
+                               continue;
+                       exynos_bind(th_zone->therm_dev,
+                                       th_zone->cool_dev[i]);
+               }
+       }
+
+       thermal_zone_device_update(th_zone->therm_dev);
+
+       mutex_lock(&th_zone->therm_dev->lock);
+       /* Find the level for which trip happened */
+       for (i = 0; i < th_zone->sensor_conf->trip_data.trip_count; i++) {
+               if (th_zone->therm_dev->last_temperature <
+                       th_zone->sensor_conf->trip_data.trip_val[i] * MCELSIUS)
+                       break;
+       }
+
+       if (th_zone->mode == THERMAL_DEVICE_ENABLED &&
+               !th_zone->sensor_conf->trip_data.trigger_falling) {
+               if (i > 0)
+                       th_zone->therm_dev->polling_delay = ACTIVE_INTERVAL;
+               else
+                       th_zone->therm_dev->polling_delay = IDLE_INTERVAL;
+       }
+
+       snprintf(data, sizeof(data), "%u", i);
+       kobject_uevent_env(&th_zone->therm_dev->device.kobj, KOBJ_CHANGE, envp);
+       mutex_unlock(&th_zone->therm_dev->lock);
+}
+
+/* Register with the in-kernel thermal management */
+int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf)
+{
+       int ret;
+       struct cpumask mask_val;
+       struct exynos_thermal_zone *th_zone;
+
+       if (!sensor_conf || !sensor_conf->read_temperature) {
+               pr_err("Temperature sensor not initialised\n");
+               return -EINVAL;
+       }
+
+       th_zone = devm_kzalloc(sensor_conf->dev,
+                               sizeof(struct exynos_thermal_zone), GFP_KERNEL);
+       if (!th_zone)
+               return -ENOMEM;
+
+       th_zone->sensor_conf = sensor_conf;
+       /*
+        * TODO: 1) Handle multiple cooling devices in a thermal zone
+        *       2) Add a flag/name in cooling info to map to specific
+        *       sensor
+        */
+       if (sensor_conf->cooling_data.freq_clip_count > 0) {
+               cpumask_set_cpu(0, &mask_val);
+               th_zone->cool_dev[th_zone->cool_dev_size] =
+                                       cpufreq_cooling_register(&mask_val);
+               if (IS_ERR(th_zone->cool_dev[th_zone->cool_dev_size])) {
+                       dev_err(sensor_conf->dev,
+                               "Failed to register cpufreq cooling device\n");
+                       ret = -EINVAL;
+                       goto err_unregister;
+               }
+               th_zone->cool_dev_size++;
+       }
+
+       th_zone->therm_dev = thermal_zone_device_register(
+                       sensor_conf->name, sensor_conf->trip_data.trip_count,
+                       0, th_zone, &exynos_dev_ops, NULL, 0,
+                       sensor_conf->trip_data.trigger_falling ? 0 :
+                       IDLE_INTERVAL);
+
+       if (IS_ERR(th_zone->therm_dev)) {
+               dev_err(sensor_conf->dev,
+                       "Failed to register thermal zone device\n");
+               ret = PTR_ERR(th_zone->therm_dev);
+               goto err_unregister;
+       }
+       th_zone->mode = THERMAL_DEVICE_ENABLED;
+       sensor_conf->pzone_data = th_zone;
+
+       dev_info(sensor_conf->dev,
+               "Exynos: Thermal zone(%s) registered\n", sensor_conf->name);
+
+       return 0;
+
+err_unregister:
+       exynos_unregister_thermal(sensor_conf);
+       return ret;
+}
+
+/* Un-Register with the in-kernel thermal management */
+void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf)
+{
+       int i;
+       struct exynos_thermal_zone *th_zone;
+
+       if (!sensor_conf || !sensor_conf->pzone_data) {
+               pr_err("Invalid temperature sensor configuration data\n");
+               return;
+       }
+
+       th_zone = sensor_conf->pzone_data;
+
+       if (th_zone->therm_dev)
+               thermal_zone_device_unregister(th_zone->therm_dev);
+
+       for (i = 0; i < th_zone->cool_dev_size; i++) {
+               if (th_zone->cool_dev[i])
+                       cpufreq_cooling_unregister(th_zone->cool_dev[i]);
+       }
+
+       dev_info(sensor_conf->dev,
+               "Exynos: Kernel Thermal management unregistered\n");
+}
diff --git a/drivers/thermal/samsung/exynos_thermal_common.h b/drivers/thermal/samsung/exynos_thermal_common.h
new file mode 100644 (file)
index 0000000..3eb2ed9
--- /dev/null
@@ -0,0 +1,107 @@
+/*
+ * exynos_thermal_common.h - Samsung EXYNOS common header file
+ *
+ *  Copyright (C) 2013 Samsung Electronics
+ *  Amit Daniel Kachhap <amit.daniel@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _EXYNOS_THERMAL_COMMON_H
+#define _EXYNOS_THERMAL_COMMON_H
+
+/* In-kernel thermal framework related macros & definations */
+#define SENSOR_NAME_LEN        16
+#define MAX_TRIP_COUNT 8
+#define MAX_COOLING_DEVICE 4
+#define MAX_THRESHOLD_LEVS 5
+
+#define ACTIVE_INTERVAL 500
+#define IDLE_INTERVAL 10000
+#define MCELSIUS       1000
+
+/* CPU Zone information */
+#define PANIC_ZONE      4
+#define WARN_ZONE       3
+#define MONITOR_ZONE    2
+#define SAFE_ZONE       1
+
+#define GET_ZONE(trip) (trip + 2)
+#define GET_TRIP(zone) (zone - 2)
+
+enum trigger_type {
+       THROTTLE_ACTIVE = 1,
+       THROTTLE_PASSIVE,
+       SW_TRIP,
+       HW_TRIP,
+};
+
+/**
+ * struct freq_clip_table
+ * @freq_clip_max: maximum frequency allowed for this cooling state.
+ * @temp_level: Temperature level at which the temperature clipping will
+ *     happen.
+ * @mask_val: cpumask of the allowed cpu's where the clipping will take place.
+ *
+ * This structure is required to be filled and passed to the
+ * cpufreq_cooling_unregister function.
+ */
+struct freq_clip_table {
+       unsigned int freq_clip_max;
+       unsigned int temp_level;
+       const struct cpumask *mask_val;
+};
+
+struct thermal_trip_point_conf {
+       int trip_val[MAX_TRIP_COUNT];
+       int trip_type[MAX_TRIP_COUNT];
+       int trip_count;
+       unsigned char trigger_falling;
+};
+
+struct thermal_cooling_conf {
+       struct freq_clip_table freq_data[MAX_TRIP_COUNT];
+       int freq_clip_count;
+};
+
+struct thermal_sensor_conf {
+       char name[SENSOR_NAME_LEN];
+       int (*read_temperature)(void *data);
+       int (*write_emul_temp)(void *drv_data, unsigned long temp);
+       struct thermal_trip_point_conf trip_data;
+       struct thermal_cooling_conf cooling_data;
+       void *driver_data;
+       void *pzone_data;
+       struct device *dev;
+};
+
+/*Functions used exynos based thermal sensor driver*/
+#ifdef CONFIG_EXYNOS_THERMAL_CORE
+void exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf);
+int exynos_register_thermal(struct thermal_sensor_conf *sensor_conf);
+void exynos_report_trigger(struct thermal_sensor_conf *sensor_conf);
+#else
+static inline void
+exynos_unregister_thermal(struct thermal_sensor_conf *sensor_conf) { return; }
+
+static inline int
+exynos_register_thermal(struct thermal_sensor_conf *sensor_conf) { return 0; }
+
+static inline void
+exynos_report_trigger(struct thermal_sensor_conf *sensor_conf) { return; }
+
+#endif /* CONFIG_EXYNOS_THERMAL_CORE */
+#endif /* _EXYNOS_THERMAL_COMMON_H */
diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c
new file mode 100644 (file)
index 0000000..b43afda
--- /dev/null
@@ -0,0 +1,762 @@
+/*
+ * exynos_tmu.c - Samsung EXYNOS TMU (Thermal Management Unit)
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *  Donggeun Kim <dg77.kim@samsung.com>
+ *  Amit Daniel Kachhap <amit.kachhap@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+
+#include "exynos_thermal_common.h"
+#include "exynos_tmu.h"
+#include "exynos_tmu_data.h"
+
+/**
+ * struct exynos_tmu_data : A structure to hold the private data of the TMU
+       driver
+ * @id: identifier of the one instance of the TMU controller.
+ * @pdata: pointer to the tmu platform/configuration data
+ * @base: base address of the single instance of the TMU controller.
+ * @base_common: base address of the common registers of the TMU controller.
+ * @irq: irq number of the TMU controller.
+ * @soc: id of the SOC type.
+ * @irq_work: pointer to the irq work structure.
+ * @lock: lock to implement synchronization.
+ * @clk: pointer to the clock structure.
+ * @temp_error1: fused value of the first point trim.
+ * @temp_error2: fused value of the second point trim.
+ * @regulator: pointer to the TMU regulator structure.
+ * @reg_conf: pointer to structure to register with core thermal.
+ */
+struct exynos_tmu_data {
+       int id;
+       struct exynos_tmu_platform_data *pdata;
+       void __iomem *base;
+       void __iomem *base_common;
+       int irq;
+       enum soc_type soc;
+       struct work_struct irq_work;
+       struct mutex lock;
+       struct clk *clk;
+       u8 temp_error1, temp_error2;
+       struct regulator *regulator;
+       struct thermal_sensor_conf *reg_conf;
+};
+
+/*
+ * TMU treats temperature as a mapped temperature code.
+ * The temperature is converted differently depending on the calibration type.
+ */
+static int temp_to_code(struct exynos_tmu_data *data, u8 temp)
+{
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       int temp_code;
+
+       if (pdata->cal_mode == HW_MODE)
+               return temp;
+
+       if (data->soc == SOC_ARCH_EXYNOS4210)
+               /* temp should range between 25 and 125 */
+               if (temp < 25 || temp > 125) {
+                       temp_code = -EINVAL;
+                       goto out;
+               }
+
+       switch (pdata->cal_type) {
+       case TYPE_TWO_POINT_TRIMMING:
+               temp_code = (temp - pdata->first_point_trim) *
+                       (data->temp_error2 - data->temp_error1) /
+                       (pdata->second_point_trim - pdata->first_point_trim) +
+                       data->temp_error1;
+               break;
+       case TYPE_ONE_POINT_TRIMMING:
+               temp_code = temp + data->temp_error1 - pdata->first_point_trim;
+               break;
+       default:
+               temp_code = temp + pdata->default_temp_offset;
+               break;
+       }
+out:
+       return temp_code;
+}
+
+/*
+ * Calculate a temperature value from a temperature code.
+ * The unit of the temperature is degree Celsius.
+ */
+static int code_to_temp(struct exynos_tmu_data *data, u8 temp_code)
+{
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       int temp;
+
+       if (pdata->cal_mode == HW_MODE)
+               return temp_code;
+
+       if (data->soc == SOC_ARCH_EXYNOS4210)
+               /* temp_code should range between 75 and 175 */
+               if (temp_code < 75 || temp_code > 175) {
+                       temp = -ENODATA;
+                       goto out;
+               }
+
+       switch (pdata->cal_type) {
+       case TYPE_TWO_POINT_TRIMMING:
+               temp = (temp_code - data->temp_error1) *
+                       (pdata->second_point_trim - pdata->first_point_trim) /
+                       (data->temp_error2 - data->temp_error1) +
+                       pdata->first_point_trim;
+               break;
+       case TYPE_ONE_POINT_TRIMMING:
+               temp = temp_code - data->temp_error1 + pdata->first_point_trim;
+               break;
+       default:
+               temp = temp_code - pdata->default_temp_offset;
+               break;
+       }
+out:
+       return temp;
+}
+
+static int exynos_tmu_initialize(struct platform_device *pdev)
+{
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       const struct exynos_tmu_registers *reg = pdata->registers;
+       unsigned int status, trim_info = 0, con;
+       unsigned int rising_threshold = 0, falling_threshold = 0;
+       int ret = 0, threshold_code, i, trigger_levs = 0;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       if (TMU_SUPPORTS(pdata, READY_STATUS)) {
+               status = readb(data->base + reg->tmu_status);
+               if (!status) {
+                       ret = -EBUSY;
+                       goto out;
+               }
+       }
+
+       if (TMU_SUPPORTS(pdata, TRIM_RELOAD))
+               __raw_writel(1, data->base + reg->triminfo_ctrl);
+
+       if (pdata->cal_mode == HW_MODE)
+               goto skip_calib_data;
+
+       /* Save trimming info in order to perform calibration */
+       if (data->soc == SOC_ARCH_EXYNOS5440) {
+               /*
+                * For exynos5440 soc triminfo value is swapped between TMU0 and
+                * TMU2, so the below logic is needed.
+                */
+               switch (data->id) {
+               case 0:
+                       trim_info = readl(data->base +
+                       EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
+                       break;
+               case 1:
+                       trim_info = readl(data->base + reg->triminfo_data);
+                       break;
+               case 2:
+                       trim_info = readl(data->base -
+                       EXYNOS5440_EFUSE_SWAP_OFFSET + reg->triminfo_data);
+               }
+       } else {
+               trim_info = readl(data->base + reg->triminfo_data);
+       }
+       data->temp_error1 = trim_info & EXYNOS_TMU_TEMP_MASK;
+       data->temp_error2 = ((trim_info >> reg->triminfo_85_shift) &
+                               EXYNOS_TMU_TEMP_MASK);
+
+       if (!data->temp_error1 ||
+               (pdata->min_efuse_value > data->temp_error1) ||
+               (data->temp_error1 > pdata->max_efuse_value))
+               data->temp_error1 = pdata->efuse_value & EXYNOS_TMU_TEMP_MASK;
+
+       if (!data->temp_error2)
+               data->temp_error2 =
+                       (pdata->efuse_value >> reg->triminfo_85_shift) &
+                       EXYNOS_TMU_TEMP_MASK;
+
+skip_calib_data:
+       if (pdata->max_trigger_level > MAX_THRESHOLD_LEVS) {
+               dev_err(&pdev->dev, "Invalid max trigger level\n");
+               goto out;
+       }
+
+       for (i = 0; i < pdata->max_trigger_level; i++) {
+               if (!pdata->trigger_levels[i])
+                       continue;
+
+               if ((pdata->trigger_type[i] == HW_TRIP) &&
+               (!pdata->trigger_levels[pdata->max_trigger_level - 1])) {
+                       dev_err(&pdev->dev, "Invalid hw trigger level\n");
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               /* Count trigger levels except the HW trip*/
+               if (!(pdata->trigger_type[i] == HW_TRIP))
+                       trigger_levs++;
+       }
+
+       if (data->soc == SOC_ARCH_EXYNOS4210) {
+               /* Write temperature code for threshold */
+               threshold_code = temp_to_code(data, pdata->threshold);
+               if (threshold_code < 0) {
+                       ret = threshold_code;
+                       goto out;
+               }
+               writeb(threshold_code,
+                       data->base + reg->threshold_temp);
+               for (i = 0; i < trigger_levs; i++)
+                       writeb(pdata->trigger_levels[i], data->base +
+                       reg->threshold_th0 + i * sizeof(reg->threshold_th0));
+
+               writel(reg->inten_rise_mask, data->base + reg->tmu_intclear);
+       } else {
+               /* Write temperature code for rising and falling threshold */
+               for (i = 0;
+               i < trigger_levs && i < EXYNOS_MAX_TRIGGER_PER_REG; i++) {
+                       threshold_code = temp_to_code(data,
+                                               pdata->trigger_levels[i]);
+                       if (threshold_code < 0) {
+                               ret = threshold_code;
+                               goto out;
+                       }
+                       rising_threshold |= threshold_code << 8 * i;
+                       if (pdata->threshold_falling) {
+                               threshold_code = temp_to_code(data,
+                                               pdata->trigger_levels[i] -
+                                               pdata->threshold_falling);
+                               if (threshold_code > 0)
+                                       falling_threshold |=
+                                               threshold_code << 8 * i;
+                       }
+               }
+
+               writel(rising_threshold,
+                               data->base + reg->threshold_th0);
+               writel(falling_threshold,
+                               data->base + reg->threshold_th1);
+
+               writel((reg->inten_rise_mask << reg->inten_rise_shift) |
+                       (reg->inten_fall_mask << reg->inten_fall_shift),
+                               data->base + reg->tmu_intclear);
+
+               /* if last threshold limit is also present */
+               i = pdata->max_trigger_level - 1;
+               if (pdata->trigger_levels[i] &&
+                               (pdata->trigger_type[i] == HW_TRIP)) {
+                       threshold_code = temp_to_code(data,
+                                               pdata->trigger_levels[i]);
+                       if (threshold_code < 0) {
+                               ret = threshold_code;
+                               goto out;
+                       }
+                       if (i == EXYNOS_MAX_TRIGGER_PER_REG - 1) {
+                               /* 1-4 level to be assigned in th0 reg */
+                               rising_threshold |= threshold_code << 8 * i;
+                               writel(rising_threshold,
+                                       data->base + reg->threshold_th0);
+                       } else if (i == EXYNOS_MAX_TRIGGER_PER_REG) {
+                               /* 5th level to be assigned in th2 reg */
+                               rising_threshold =
+                               threshold_code << reg->threshold_th3_l0_shift;
+                               writel(rising_threshold,
+                                       data->base + reg->threshold_th2);
+                       }
+                       con = readl(data->base + reg->tmu_ctrl);
+                       con |= (1 << reg->therm_trip_en_shift);
+                       writel(con, data->base + reg->tmu_ctrl);
+               }
+       }
+       /*Clear the PMIN in the common TMU register*/
+       if (reg->tmu_pmin && !data->id)
+               writel(0, data->base_common + reg->tmu_pmin);
+out:
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+
+       return ret;
+}
+
+static void exynos_tmu_control(struct platform_device *pdev, bool on)
+{
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       const struct exynos_tmu_registers *reg = pdata->registers;
+       unsigned int con, interrupt_en, cal_val;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       con = readl(data->base + reg->tmu_ctrl);
+
+       if (pdata->reference_voltage) {
+               con &= ~(reg->buf_vref_sel_mask << reg->buf_vref_sel_shift);
+               con |= pdata->reference_voltage << reg->buf_vref_sel_shift;
+       }
+
+       if (pdata->gain) {
+               con &= ~(reg->buf_slope_sel_mask << reg->buf_slope_sel_shift);
+               con |= (pdata->gain << reg->buf_slope_sel_shift);
+       }
+
+       if (pdata->noise_cancel_mode) {
+               con &= ~(reg->therm_trip_mode_mask <<
+                                       reg->therm_trip_mode_shift);
+               con |= (pdata->noise_cancel_mode << reg->therm_trip_mode_shift);
+       }
+
+       if (pdata->cal_mode == HW_MODE) {
+               con &= ~(reg->calib_mode_mask << reg->calib_mode_shift);
+               cal_val = 0;
+               switch (pdata->cal_type) {
+               case TYPE_TWO_POINT_TRIMMING:
+                       cal_val = 3;
+                       break;
+               case TYPE_ONE_POINT_TRIMMING_85:
+                       cal_val = 2;
+                       break;
+               case TYPE_ONE_POINT_TRIMMING_25:
+                       cal_val = 1;
+                       break;
+               case TYPE_NONE:
+                       break;
+               default:
+                       dev_err(&pdev->dev, "Invalid calibration type, using none\n");
+               }
+               con |= cal_val << reg->calib_mode_shift;
+       }
+
+       if (on) {
+               con |= (1 << reg->core_en_shift);
+               interrupt_en =
+                       pdata->trigger_enable[3] << reg->inten_rise3_shift |
+                       pdata->trigger_enable[2] << reg->inten_rise2_shift |
+                       pdata->trigger_enable[1] << reg->inten_rise1_shift |
+                       pdata->trigger_enable[0] << reg->inten_rise0_shift;
+               if (TMU_SUPPORTS(pdata, FALLING_TRIP))
+                       interrupt_en |=
+                               interrupt_en << reg->inten_fall0_shift;
+       } else {
+               con &= ~(1 << reg->core_en_shift);
+               interrupt_en = 0; /* Disable all interrupts */
+       }
+       writel(interrupt_en, data->base + reg->tmu_inten);
+       writel(con, data->base + reg->tmu_ctrl);
+
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+}
+
+static int exynos_tmu_read(struct exynos_tmu_data *data)
+{
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       const struct exynos_tmu_registers *reg = pdata->registers;
+       u8 temp_code;
+       int temp;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       temp_code = readb(data->base + reg->tmu_cur_temp);
+       temp = code_to_temp(data, temp_code);
+
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+
+       return temp;
+}
+
+#ifdef CONFIG_THERMAL_EMULATION
+static int exynos_tmu_set_emulation(void *drv_data, unsigned long temp)
+{
+       struct exynos_tmu_data *data = drv_data;
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       const struct exynos_tmu_registers *reg = pdata->registers;
+       unsigned int val;
+       int ret = -EINVAL;
+
+       if (!TMU_SUPPORTS(pdata, EMULATION))
+               goto out;
+
+       if (temp && temp < MCELSIUS)
+               goto out;
+
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       val = readl(data->base + reg->emul_con);
+
+       if (temp) {
+               temp /= MCELSIUS;
+
+               if (TMU_SUPPORTS(pdata, EMUL_TIME)) {
+                       val &= ~(EXYNOS_EMUL_TIME_MASK << reg->emul_time_shift);
+                       val |= (EXYNOS_EMUL_TIME << reg->emul_time_shift);
+               }
+               val &= ~(EXYNOS_EMUL_DATA_MASK << reg->emul_temp_shift);
+               val |= (temp_to_code(data, temp) << reg->emul_temp_shift) |
+                       EXYNOS_EMUL_ENABLE;
+       } else {
+               val &= ~EXYNOS_EMUL_ENABLE;
+       }
+
+       writel(val, data->base + reg->emul_con);
+
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+       return 0;
+out:
+       return ret;
+}
+#else
+static int exynos_tmu_set_emulation(void *drv_data,    unsigned long temp)
+       { return -EINVAL; }
+#endif/*CONFIG_THERMAL_EMULATION*/
+
+static void exynos_tmu_work(struct work_struct *work)
+{
+       struct exynos_tmu_data *data = container_of(work,
+                       struct exynos_tmu_data, irq_work);
+       struct exynos_tmu_platform_data *pdata = data->pdata;
+       const struct exynos_tmu_registers *reg = pdata->registers;
+       unsigned int val_irq, val_type;
+
+       /* Find which sensor generated this interrupt */
+       if (reg->tmu_irqstatus) {
+               val_type = readl(data->base_common + reg->tmu_irqstatus);
+               if (!((val_type >> data->id) & 0x1))
+                       goto out;
+       }
+
+       exynos_report_trigger(data->reg_conf);
+       mutex_lock(&data->lock);
+       clk_enable(data->clk);
+
+       /* TODO: take action based on particular interrupt */
+       val_irq = readl(data->base + reg->tmu_intstat);
+       /* clear the interrupts */
+       writel(val_irq, data->base + reg->tmu_intclear);
+
+       clk_disable(data->clk);
+       mutex_unlock(&data->lock);
+out:
+       enable_irq(data->irq);
+}
+
+static irqreturn_t exynos_tmu_irq(int irq, void *id)
+{
+       struct exynos_tmu_data *data = id;
+
+       disable_irq_nosync(irq);
+       schedule_work(&data->irq_work);
+
+       return IRQ_HANDLED;
+}
+
+static const struct of_device_id exynos_tmu_match[] = {
+       {
+               .compatible = "samsung,exynos4210-tmu",
+               .data = (void *)EXYNOS4210_TMU_DRV_DATA,
+       },
+       {
+               .compatible = "samsung,exynos4412-tmu",
+               .data = (void *)EXYNOS5250_TMU_DRV_DATA,
+       },
+       {
+               .compatible = "samsung,exynos5250-tmu",
+               .data = (void *)EXYNOS5250_TMU_DRV_DATA,
+       },
+       {
+               .compatible = "samsung,exynos5440-tmu",
+               .data = (void *)EXYNOS5440_TMU_DRV_DATA,
+       },
+       {},
+};
+MODULE_DEVICE_TABLE(of, exynos_tmu_match);
+
+static inline struct  exynos_tmu_platform_data *exynos_get_driver_data(
+                       struct platform_device *pdev, int id)
+{
+       struct  exynos_tmu_init_data *data_table;
+       struct exynos_tmu_platform_data *tmu_data;
+       const struct of_device_id *match;
+
+       match = of_match_node(exynos_tmu_match, pdev->dev.of_node);
+       if (!match)
+               return NULL;
+       data_table = (struct exynos_tmu_init_data *) match->data;
+       if (!data_table || id >= data_table->tmu_count)
+               return NULL;
+       tmu_data = data_table->tmu_data;
+       return (struct exynos_tmu_platform_data *) (tmu_data + id);
+}
+
+static int exynos_map_dt_data(struct platform_device *pdev)
+{
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+       struct exynos_tmu_platform_data *pdata;
+       struct resource res;
+       int ret;
+
+       if (!data || !pdev->dev.of_node)
+               return -ENODEV;
+
+       /*
+        * Try enabling the regulator if found
+        * TODO: Add regulator as an SOC feature, so that regulator enable
+        * is a compulsory call.
+        */
+       data->regulator = devm_regulator_get(&pdev->dev, "vtmu");
+       if (!IS_ERR(data->regulator)) {
+               ret = regulator_enable(data->regulator);
+               if (ret) {
+                       dev_err(&pdev->dev, "failed to enable vtmu\n");
+                       return ret;
+               }
+       } else {
+               dev_info(&pdev->dev, "Regulator node (vtmu) not found\n");
+       }
+
+       data->id = of_alias_get_id(pdev->dev.of_node, "tmuctrl");
+       if (data->id < 0)
+               data->id = 0;
+
+       data->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
+       if (data->irq <= 0) {
+               dev_err(&pdev->dev, "failed to get IRQ\n");
+               return -ENODEV;
+       }
+
+       if (of_address_to_resource(pdev->dev.of_node, 0, &res)) {
+               dev_err(&pdev->dev, "failed to get Resource 0\n");
+               return -ENODEV;
+       }
+
+       data->base = devm_ioremap(&pdev->dev, res.start, resource_size(&res));
+       if (!data->base) {
+               dev_err(&pdev->dev, "Failed to ioremap memory\n");
+               return -EADDRNOTAVAIL;
+       }
+
+       pdata = exynos_get_driver_data(pdev, data->id);
+       if (!pdata) {
+               dev_err(&pdev->dev, "No platform init data supplied.\n");
+               return -ENODEV;
+       }
+       data->pdata = pdata;
+       /*
+        * Check if the TMU shares some registers and then try to map the
+        * memory of common registers.
+        */
+       if (!TMU_SUPPORTS(pdata, SHARED_MEMORY))
+               return 0;
+
+       if (of_address_to_resource(pdev->dev.of_node, 1, &res)) {
+               dev_err(&pdev->dev, "failed to get Resource 1\n");
+               return -ENODEV;
+       }
+
+       data->base_common = devm_ioremap(&pdev->dev, res.start,
+                                       resource_size(&res));
+       if (!data->base_common) {
+               dev_err(&pdev->dev, "Failed to ioremap memory\n");
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static int exynos_tmu_probe(struct platform_device *pdev)
+{
+       struct exynos_tmu_data *data;
+       struct exynos_tmu_platform_data *pdata;
+       struct thermal_sensor_conf *sensor_conf;
+       int ret, i;
+
+       data = devm_kzalloc(&pdev->dev, sizeof(struct exynos_tmu_data),
+                                       GFP_KERNEL);
+       if (!data) {
+               dev_err(&pdev->dev, "Failed to allocate driver structure\n");
+               return -ENOMEM;
+       }
+
+       platform_set_drvdata(pdev, data);
+       mutex_init(&data->lock);
+
+       ret = exynos_map_dt_data(pdev);
+       if (ret)
+               return ret;
+
+       pdata = data->pdata;
+
+       INIT_WORK(&data->irq_work, exynos_tmu_work);
+
+       data->clk = devm_clk_get(&pdev->dev, "tmu_apbif");
+       if (IS_ERR(data->clk)) {
+               dev_err(&pdev->dev, "Failed to get clock\n");
+               return  PTR_ERR(data->clk);
+       }
+
+       ret = clk_prepare(data->clk);
+       if (ret)
+               return ret;
+
+       if (pdata->type == SOC_ARCH_EXYNOS ||
+               pdata->type == SOC_ARCH_EXYNOS4210 ||
+                               pdata->type == SOC_ARCH_EXYNOS5440)
+               data->soc = pdata->type;
+       else {
+               ret = -EINVAL;
+               dev_err(&pdev->dev, "Platform not supported\n");
+               goto err_clk;
+       }
+
+       ret = exynos_tmu_initialize(pdev);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to initialize TMU\n");
+               goto err_clk;
+       }
+
+       exynos_tmu_control(pdev, true);
+
+       /* Allocate a structure to register with the exynos core thermal */
+       sensor_conf = devm_kzalloc(&pdev->dev,
+                               sizeof(struct thermal_sensor_conf), GFP_KERNEL);
+       if (!sensor_conf) {
+               dev_err(&pdev->dev, "Failed to allocate registration struct\n");
+               ret = -ENOMEM;
+               goto err_clk;
+       }
+       sprintf(sensor_conf->name, "therm_zone%d", data->id);
+       sensor_conf->read_temperature = (int (*)(void *))exynos_tmu_read;
+       sensor_conf->write_emul_temp =
+               (int (*)(void *, unsigned long))exynos_tmu_set_emulation;
+       sensor_conf->driver_data = data;
+       sensor_conf->trip_data.trip_count = pdata->trigger_enable[0] +
+                       pdata->trigger_enable[1] + pdata->trigger_enable[2]+
+                       pdata->trigger_enable[3];
+
+       for (i = 0; i < sensor_conf->trip_data.trip_count; i++) {
+               sensor_conf->trip_data.trip_val[i] =
+                       pdata->threshold + pdata->trigger_levels[i];
+               sensor_conf->trip_data.trip_type[i] =
+                                       pdata->trigger_type[i];
+       }
+
+       sensor_conf->trip_data.trigger_falling = pdata->threshold_falling;
+
+       sensor_conf->cooling_data.freq_clip_count = pdata->freq_tab_count;
+       for (i = 0; i < pdata->freq_tab_count; i++) {
+               sensor_conf->cooling_data.freq_data[i].freq_clip_max =
+                                       pdata->freq_tab[i].freq_clip_max;
+               sensor_conf->cooling_data.freq_data[i].temp_level =
+                                       pdata->freq_tab[i].temp_level;
+       }
+       sensor_conf->dev = &pdev->dev;
+       /* Register the sensor with thermal management interface */
+       ret = exynos_register_thermal(sensor_conf);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to register thermal interface\n");
+               goto err_clk;
+       }
+       data->reg_conf = sensor_conf;
+
+       ret = devm_request_irq(&pdev->dev, data->irq, exynos_tmu_irq,
+               IRQF_TRIGGER_RISING | IRQF_SHARED, dev_name(&pdev->dev), data);
+       if (ret) {
+               dev_err(&pdev->dev, "Failed to request irq: %d\n", data->irq);
+               goto err_clk;
+       }
+
+       return 0;
+err_clk:
+       clk_unprepare(data->clk);
+       return ret;
+}
+
+static int exynos_tmu_remove(struct platform_device *pdev)
+{
+       struct exynos_tmu_data *data = platform_get_drvdata(pdev);
+
+       exynos_tmu_control(pdev, false);
+
+       exynos_unregister_thermal(data->reg_conf);
+
+       clk_unprepare(data->clk);
+
+       if (!IS_ERR(data->regulator))
+               regulator_disable(data->regulator);
+
+       return 0;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int exynos_tmu_suspend(struct device *dev)
+{
+       exynos_tmu_control(to_platform_device(dev), false);
+
+       return 0;
+}
+
+static int exynos_tmu_resume(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+
+       exynos_tmu_initialize(pdev);
+       exynos_tmu_control(pdev, true);
+
+       return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(exynos_tmu_pm,
+                        exynos_tmu_suspend, exynos_tmu_resume);
+#define EXYNOS_TMU_PM  (&exynos_tmu_pm)
+#else
+#define EXYNOS_TMU_PM  NULL
+#endif
+
+static struct platform_driver exynos_tmu_driver = {
+       .driver = {
+               .name   = "exynos-tmu",
+               .owner  = THIS_MODULE,
+               .pm     = EXYNOS_TMU_PM,
+               .of_match_table = exynos_tmu_match,
+       },
+       .probe = exynos_tmu_probe,
+       .remove = exynos_tmu_remove,
+};
+
+module_platform_driver(exynos_tmu_driver);
+
+MODULE_DESCRIPTION("EXYNOS TMU Driver");
+MODULE_AUTHOR("Donggeun Kim <dg77.kim@samsung.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:exynos-tmu");
diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h
new file mode 100644 (file)
index 0000000..b364c9e
--- /dev/null
@@ -0,0 +1,311 @@
+/*
+ * exynos_tmu.h - Samsung EXYNOS TMU (Thermal Management Unit)
+ *
+ *  Copyright (C) 2011 Samsung Electronics
+ *  Donggeun Kim <dg77.kim@samsung.com>
+ *  Amit Daniel Kachhap <amit.daniel@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef _EXYNOS_TMU_H
+#define _EXYNOS_TMU_H
+#include <linux/cpu_cooling.h>
+
+#include "exynos_thermal_common.h"
+
+enum calibration_type {
+       TYPE_ONE_POINT_TRIMMING,
+       TYPE_ONE_POINT_TRIMMING_25,
+       TYPE_ONE_POINT_TRIMMING_85,
+       TYPE_TWO_POINT_TRIMMING,
+       TYPE_NONE,
+};
+
+enum calibration_mode {
+       SW_MODE,
+       HW_MODE,
+};
+
+enum soc_type {
+       SOC_ARCH_EXYNOS4210 = 1,
+       SOC_ARCH_EXYNOS,
+       SOC_ARCH_EXYNOS5440,
+};
+
+/**
+ * EXYNOS TMU supported features.
+ * TMU_SUPPORT_EMULATION - This features is used to set user defined
+ *                     temperature to the TMU controller.
+ * TMU_SUPPORT_MULTI_INST - This features denotes that the soc
+ *                     has many instances of TMU.
+ * TMU_SUPPORT_TRIM_RELOAD - This features shows that trimming can
+ *                     be reloaded.
+ * TMU_SUPPORT_FALLING_TRIP - This features shows that interrupt can
+ *                     be registered for falling trips also.
+ * TMU_SUPPORT_READY_STATUS - This feature tells that the TMU current
+ *                     state(active/idle) can be checked.
+ * TMU_SUPPORT_EMUL_TIME - This features allows to set next temp emulation
+ *                     sample time.
+ * TMU_SUPPORT_SHARED_MEMORY - This feature tells that the different TMU
+ *                     sensors shares some common registers.
+ * TMU_SUPPORT - macro to compare the above features with the supplied.
+ */
+#define TMU_SUPPORT_EMULATION                  BIT(0)
+#define TMU_SUPPORT_MULTI_INST                 BIT(1)
+#define TMU_SUPPORT_TRIM_RELOAD                        BIT(2)
+#define TMU_SUPPORT_FALLING_TRIP               BIT(3)
+#define TMU_SUPPORT_READY_STATUS               BIT(4)
+#define TMU_SUPPORT_EMUL_TIME                  BIT(5)
+#define TMU_SUPPORT_SHARED_MEMORY              BIT(6)
+
+#define TMU_SUPPORTS(a, b)     (a->features & TMU_SUPPORT_ ## b)
+
+/**
+ * struct exynos_tmu_register - register descriptors to access registers and
+ * bitfields. The register validity, offsets and bitfield values may vary
+ * slightly across different exynos SOC's.
+ * @triminfo_data: register containing 2 pont trimming data
+ * @triminfo_25_shift: shift bit of the 25 C trim value in triminfo_data reg.
+ * @triminfo_85_shift: shift bit of the 85 C trim value in triminfo_data reg.
+ * @triminfo_ctrl: trim info controller register.
+ * @triminfo_reload_shift: shift of triminfo reload enable bit in triminfo_ctrl
+       reg.
+ * @tmu_ctrl: TMU main controller register.
+ * @buf_vref_sel_shift: shift bits of reference voltage in tmu_ctrl register.
+ * @buf_vref_sel_mask: mask bits of reference voltage in tmu_ctrl register.
+ * @therm_trip_mode_shift: shift bits of tripping mode in tmu_ctrl register.
+ * @therm_trip_mode_mask: mask bits of tripping mode in tmu_ctrl register.
+ * @therm_trip_en_shift: shift bits of tripping enable in tmu_ctrl register.
+ * @buf_slope_sel_shift: shift bits of amplifier gain value in tmu_ctrl
+       register.
+ * @buf_slope_sel_mask: mask bits of amplifier gain value in tmu_ctrl register.
+ * @calib_mode_shift: shift bits of calibration mode value in tmu_ctrl
+       register.
+ * @calib_mode_mask: mask bits of calibration mode value in tmu_ctrl
+       register.
+ * @therm_trip_tq_en_shift: shift bits of thermal trip enable by TQ pin in
+       tmu_ctrl register.
+ * @core_en_shift: shift bits of TMU core enable bit in tmu_ctrl register.
+ * @tmu_status: register drescribing the TMU status.
+ * @tmu_cur_temp: register containing the current temperature of the TMU.
+ * @tmu_cur_temp_shift: shift bits of current temp value in tmu_cur_temp
+       register.
+ * @threshold_temp: register containing the base threshold level.
+ * @threshold_th0: Register containing first set of rising levels.
+ * @threshold_th0_l0_shift: shift bits of level0 threshold temperature.
+ * @threshold_th0_l1_shift: shift bits of level1 threshold temperature.
+ * @threshold_th0_l2_shift: shift bits of level2 threshold temperature.
+ * @threshold_th0_l3_shift: shift bits of level3 threshold temperature.
+ * @threshold_th1: Register containing second set of rising levels.
+ * @threshold_th1_l0_shift: shift bits of level0 threshold temperature.
+ * @threshold_th1_l1_shift: shift bits of level1 threshold temperature.
+ * @threshold_th1_l2_shift: shift bits of level2 threshold temperature.
+ * @threshold_th1_l3_shift: shift bits of level3 threshold temperature.
+ * @threshold_th2: Register containing third set of rising levels.
+ * @threshold_th2_l0_shift: shift bits of level0 threshold temperature.
+ * @threshold_th3: Register containing fourth set of rising levels.
+ * @threshold_th3_l0_shift: shift bits of level0 threshold temperature.
+ * @tmu_inten: register containing the different threshold interrupt
+       enable bits.
+ * @inten_rise_shift: shift bits of all rising interrupt bits.
+ * @inten_rise_mask: mask bits of all rising interrupt bits.
+ * @inten_fall_shift: shift bits of all rising interrupt bits.
+ * @inten_fall_mask: mask bits of all rising interrupt bits.
+ * @inten_rise0_shift: shift bits of rising 0 interrupt bits.
+ * @inten_rise1_shift: shift bits of rising 1 interrupt bits.
+ * @inten_rise2_shift: shift bits of rising 2 interrupt bits.
+ * @inten_rise3_shift: shift bits of rising 3 interrupt bits.
+ * @inten_fall0_shift: shift bits of falling 0 interrupt bits.
+ * @inten_fall1_shift: shift bits of falling 1 interrupt bits.
+ * @inten_fall2_shift: shift bits of falling 2 interrupt bits.
+ * @inten_fall3_shift: shift bits of falling 3 interrupt bits.
+ * @tmu_intstat: Register containing the interrupt status values.
+ * @tmu_intclear: Register for clearing the raised interrupt status.
+ * @emul_con: TMU emulation controller register.
+ * @emul_temp_shift: shift bits of emulation temperature.
+ * @emul_time_shift: shift bits of emulation time.
+ * @emul_time_mask: mask bits of emulation time.
+ * @tmu_irqstatus: register to find which TMU generated interrupts.
+ * @tmu_pmin: register to get/set the Pmin value.
+ */
+struct exynos_tmu_registers {
+       u32     triminfo_data;
+       u32     triminfo_25_shift;
+       u32     triminfo_85_shift;
+
+       u32     triminfo_ctrl;
+       u32     triminfo_reload_shift;
+
+       u32     tmu_ctrl;
+       u32     buf_vref_sel_shift;
+       u32     buf_vref_sel_mask;
+       u32     therm_trip_mode_shift;
+       u32     therm_trip_mode_mask;
+       u32     therm_trip_en_shift;
+       u32     buf_slope_sel_shift;
+       u32     buf_slope_sel_mask;
+       u32     calib_mode_shift;
+       u32     calib_mode_mask;
+       u32     therm_trip_tq_en_shift;
+       u32     core_en_shift;
+
+       u32     tmu_status;
+
+       u32     tmu_cur_temp;
+       u32     tmu_cur_temp_shift;
+
+       u32     threshold_temp;
+
+       u32     threshold_th0;
+       u32     threshold_th0_l0_shift;
+       u32     threshold_th0_l1_shift;
+       u32     threshold_th0_l2_shift;
+       u32     threshold_th0_l3_shift;
+
+       u32     threshold_th1;
+       u32     threshold_th1_l0_shift;
+       u32     threshold_th1_l1_shift;
+       u32     threshold_th1_l2_shift;
+       u32     threshold_th1_l3_shift;
+
+       u32     threshold_th2;
+       u32     threshold_th2_l0_shift;
+
+       u32     threshold_th3;
+       u32     threshold_th3_l0_shift;
+
+       u32     tmu_inten;
+       u32     inten_rise_shift;
+       u32     inten_rise_mask;
+       u32     inten_fall_shift;
+       u32     inten_fall_mask;
+       u32     inten_rise0_shift;
+       u32     inten_rise1_shift;
+       u32     inten_rise2_shift;
+       u32     inten_rise3_shift;
+       u32     inten_fall0_shift;
+       u32     inten_fall1_shift;
+       u32     inten_fall2_shift;
+       u32     inten_fall3_shift;
+
+       u32     tmu_intstat;
+
+       u32     tmu_intclear;
+
+       u32     emul_con;
+       u32     emul_temp_shift;
+       u32     emul_time_shift;
+       u32     emul_time_mask;
+
+       u32     tmu_irqstatus;
+       u32     tmu_pmin;
+};
+
+/**
+ * struct exynos_tmu_platform_data
+ * @threshold: basic temperature for generating interrupt
+ *            25 <= threshold <= 125 [unit: degree Celsius]
+ * @threshold_falling: differntial value for setting threshold
+ *                    of temperature falling interrupt.
+ * @trigger_levels: array for each interrupt levels
+ *     [unit: degree Celsius]
+ *     0: temperature for trigger_level0 interrupt
+ *        condition for trigger_level0 interrupt:
+ *             current temperature > threshold + trigger_levels[0]
+ *     1: temperature for trigger_level1 interrupt
+ *        condition for trigger_level1 interrupt:
+ *             current temperature > threshold + trigger_levels[1]
+ *     2: temperature for trigger_level2 interrupt
+ *        condition for trigger_level2 interrupt:
+ *             current temperature > threshold + trigger_levels[2]
+ *     3: temperature for trigger_level3 interrupt
+ *        condition for trigger_level3 interrupt:
+ *             current temperature > threshold + trigger_levels[3]
+ * @trigger_type: defines the type of trigger. Possible values are,
+ *     THROTTLE_ACTIVE trigger type
+ *     THROTTLE_PASSIVE trigger type
+ *     SW_TRIP trigger type
+ *     HW_TRIP
+ * @trigger_enable[]: array to denote which trigger levels are enabled.
+ *     1 = enable trigger_level[] interrupt,
+ *     0 = disable trigger_level[] interrupt
+ * @max_trigger_level: max trigger level supported by the TMU
+ * @gain: gain of amplifier in the positive-TC generator block
+ *     0 <= gain <= 15
+ * @reference_voltage: reference voltage of amplifier
+ *     in the positive-TC generator block
+ *     0 <= reference_voltage <= 31
+ * @noise_cancel_mode: noise cancellation mode
+ *     000, 100, 101, 110 and 111 can be different modes
+ * @type: determines the type of SOC
+ * @efuse_value: platform defined fuse value
+ * @min_efuse_value: minimum valid trimming data
+ * @max_efuse_value: maximum valid trimming data
+ * @first_point_trim: temp value of the first point trimming
+ * @second_point_trim: temp value of the second point trimming
+ * @default_temp_offset: default temperature offset in case of no trimming
+ * @cal_type: calibration type for temperature
+ * @cal_mode: calibration mode for temperature
+ * @freq_clip_table: Table representing frequency reduction percentage.
+ * @freq_tab_count: Count of the above table as frequency reduction may
+ *     applicable to only some of the trigger levels.
+ * @registers: Pointer to structure containing all the TMU controller registers
+ *     and bitfields shifts and masks.
+ * @features: a bitfield value indicating the features supported in SOC like
+ *     emulation, multi instance etc
+ *
+ * This structure is required for configuration of exynos_tmu driver.
+ */
+struct exynos_tmu_platform_data {
+       u8 threshold;
+       u8 threshold_falling;
+       u8 trigger_levels[MAX_TRIP_COUNT];
+       enum trigger_type trigger_type[MAX_TRIP_COUNT];
+       bool trigger_enable[MAX_TRIP_COUNT];
+       u8 max_trigger_level;
+       u8 gain;
+       u8 reference_voltage;
+       u8 noise_cancel_mode;
+
+       u32 efuse_value;
+       u32 min_efuse_value;
+       u32 max_efuse_value;
+       u8 first_point_trim;
+       u8 second_point_trim;
+       u8 default_temp_offset;
+
+       enum calibration_type cal_type;
+       enum calibration_mode cal_mode;
+       enum soc_type type;
+       struct freq_clip_table freq_tab[4];
+       unsigned int freq_tab_count;
+       const struct exynos_tmu_registers *registers;
+       unsigned int features;
+};
+
+/**
+ * struct exynos_tmu_init_data
+ * @tmu_count: number of TMU instances.
+ * @tmu_data: platform data of all TMU instances.
+ * This structure is required to store data for multi-instance exynos tmu
+ * driver.
+ */
+struct exynos_tmu_init_data {
+       int tmu_count;
+       struct exynos_tmu_platform_data tmu_data[];
+};
+
+#endif /* _EXYNOS_TMU_H */
diff --git a/drivers/thermal/samsung/exynos_tmu_data.c b/drivers/thermal/samsung/exynos_tmu_data.c
new file mode 100644 (file)
index 0000000..9002499
--- /dev/null
@@ -0,0 +1,250 @@
+/*
+ * exynos_tmu_data.c - Samsung EXYNOS tmu data file
+ *
+ *  Copyright (C) 2013 Samsung Electronics
+ *  Amit Daniel Kachhap <amit.daniel@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#include "exynos_thermal_common.h"
+#include "exynos_tmu.h"
+#include "exynos_tmu_data.h"
+
+#if defined(CONFIG_CPU_EXYNOS4210)
+static const struct exynos_tmu_registers exynos4210_tmu_registers = {
+       .triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
+       .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+       .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+       .tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
+       .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+       .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+       .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+       .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+       .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+       .tmu_status = EXYNOS_TMU_REG_STATUS,
+       .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
+       .threshold_temp = EXYNOS4210_TMU_REG_THRESHOLD_TEMP,
+       .threshold_th0 = EXYNOS4210_TMU_REG_TRIG_LEVEL0,
+       .tmu_inten = EXYNOS_TMU_REG_INTEN,
+       .inten_rise_mask = EXYNOS4210_TMU_TRIG_LEVEL_MASK,
+       .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
+       .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
+       .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
+       .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
+       .tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
+       .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+};
+
+struct exynos_tmu_init_data const exynos4210_default_tmu_data = {
+       .tmu_data = {
+               {
+               .threshold = 80,
+               .trigger_levels[0] = 5,
+               .trigger_levels[1] = 20,
+               .trigger_levels[2] = 30,
+               .trigger_enable[0] = true,
+               .trigger_enable[1] = true,
+               .trigger_enable[2] = true,
+               .trigger_enable[3] = false,
+               .trigger_type[0] = THROTTLE_ACTIVE,
+               .trigger_type[1] = THROTTLE_ACTIVE,
+               .trigger_type[2] = SW_TRIP,
+               .max_trigger_level = 4,
+               .gain = 15,
+               .reference_voltage = 7,
+               .cal_type = TYPE_ONE_POINT_TRIMMING,
+               .min_efuse_value = 40,
+               .max_efuse_value = 100,
+               .first_point_trim = 25,
+               .second_point_trim = 85,
+               .default_temp_offset = 50,
+               .freq_tab[0] = {
+                       .freq_clip_max = 800 * 1000,
+                       .temp_level = 85,
+                       },
+               .freq_tab[1] = {
+                       .freq_clip_max = 200 * 1000,
+                       .temp_level = 100,
+               },
+               .freq_tab_count = 2,
+               .type = SOC_ARCH_EXYNOS4210,
+               .registers = &exynos4210_tmu_registers,
+               .features = TMU_SUPPORT_READY_STATUS,
+               },
+       },
+       .tmu_count = 1,
+};
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412)
+static const struct exynos_tmu_registers exynos5250_tmu_registers = {
+       .triminfo_data = EXYNOS_TMU_REG_TRIMINFO,
+       .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+       .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+       .triminfo_ctrl = EXYNOS_TMU_TRIMINFO_CON,
+       .triminfo_reload_shift = EXYNOS_TRIMINFO_RELOAD_SHIFT,
+       .tmu_ctrl = EXYNOS_TMU_REG_CONTROL,
+       .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+       .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+       .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
+       .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
+       .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
+       .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+       .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+       .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+       .tmu_status = EXYNOS_TMU_REG_STATUS,
+       .tmu_cur_temp = EXYNOS_TMU_REG_CURRENT_TEMP,
+       .threshold_th0 = EXYNOS_THD_TEMP_RISE,
+       .threshold_th1 = EXYNOS_THD_TEMP_FALL,
+       .tmu_inten = EXYNOS_TMU_REG_INTEN,
+       .inten_rise_mask = EXYNOS_TMU_RISE_INT_MASK,
+       .inten_rise_shift = EXYNOS_TMU_RISE_INT_SHIFT,
+       .inten_fall_mask = EXYNOS_TMU_FALL_INT_MASK,
+       .inten_fall_shift = EXYNOS_TMU_FALL_INT_SHIFT,
+       .inten_rise0_shift = EXYNOS_TMU_INTEN_RISE0_SHIFT,
+       .inten_rise1_shift = EXYNOS_TMU_INTEN_RISE1_SHIFT,
+       .inten_rise2_shift = EXYNOS_TMU_INTEN_RISE2_SHIFT,
+       .inten_rise3_shift = EXYNOS_TMU_INTEN_RISE3_SHIFT,
+       .inten_fall0_shift = EXYNOS_TMU_INTEN_FALL0_SHIFT,
+       .tmu_intstat = EXYNOS_TMU_REG_INTSTAT,
+       .tmu_intclear = EXYNOS_TMU_REG_INTCLEAR,
+       .emul_con = EXYNOS_EMUL_CON,
+       .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
+       .emul_time_shift = EXYNOS_EMUL_TIME_SHIFT,
+       .emul_time_mask = EXYNOS_EMUL_TIME_MASK,
+};
+
+#define EXYNOS5250_TMU_DATA \
+       .threshold_falling = 10, \
+       .trigger_levels[0] = 85, \
+       .trigger_levels[1] = 103, \
+       .trigger_levels[2] = 110, \
+       .trigger_levels[3] = 120, \
+       .trigger_enable[0] = true, \
+       .trigger_enable[1] = true, \
+       .trigger_enable[2] = true, \
+       .trigger_enable[3] = false, \
+       .trigger_type[0] = THROTTLE_ACTIVE, \
+       .trigger_type[1] = THROTTLE_ACTIVE, \
+       .trigger_type[2] = SW_TRIP, \
+       .trigger_type[3] = HW_TRIP, \
+       .max_trigger_level = 4, \
+       .gain = 8, \
+       .reference_voltage = 16, \
+       .noise_cancel_mode = 4, \
+       .cal_type = TYPE_ONE_POINT_TRIMMING, \
+       .efuse_value = 55, \
+       .min_efuse_value = 40, \
+       .max_efuse_value = 100, \
+       .first_point_trim = 25, \
+       .second_point_trim = 85, \
+       .default_temp_offset = 50, \
+       .freq_tab[0] = { \
+               .freq_clip_max = 800 * 1000, \
+               .temp_level = 85, \
+       }, \
+       .freq_tab[1] = { \
+               .freq_clip_max = 200 * 1000, \
+               .temp_level = 103, \
+       }, \
+       .freq_tab_count = 2, \
+       .type = SOC_ARCH_EXYNOS, \
+       .registers = &exynos5250_tmu_registers, \
+       .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_TRIM_RELOAD | \
+                       TMU_SUPPORT_FALLING_TRIP | TMU_SUPPORT_READY_STATUS | \
+                       TMU_SUPPORT_EMUL_TIME)
+
+struct exynos_tmu_init_data const exynos5250_default_tmu_data = {
+       .tmu_data = {
+               { EXYNOS5250_TMU_DATA },
+       },
+       .tmu_count = 1,
+};
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS5440)
+static const struct exynos_tmu_registers exynos5440_tmu_registers = {
+       .triminfo_data = EXYNOS5440_TMU_S0_7_TRIM,
+       .triminfo_25_shift = EXYNOS_TRIMINFO_25_SHIFT,
+       .triminfo_85_shift = EXYNOS_TRIMINFO_85_SHIFT,
+       .tmu_ctrl = EXYNOS5440_TMU_S0_7_CTRL,
+       .buf_vref_sel_shift = EXYNOS_TMU_REF_VOLTAGE_SHIFT,
+       .buf_vref_sel_mask = EXYNOS_TMU_REF_VOLTAGE_MASK,
+       .therm_trip_mode_shift = EXYNOS_TMU_TRIP_MODE_SHIFT,
+       .therm_trip_mode_mask = EXYNOS_TMU_TRIP_MODE_MASK,
+       .therm_trip_en_shift = EXYNOS_TMU_THERM_TRIP_EN_SHIFT,
+       .buf_slope_sel_shift = EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT,
+       .buf_slope_sel_mask = EXYNOS_TMU_BUF_SLOPE_SEL_MASK,
+       .calib_mode_shift = EXYNOS_TMU_CALIB_MODE_SHIFT,
+       .calib_mode_mask = EXYNOS_TMU_CALIB_MODE_MASK,
+       .core_en_shift = EXYNOS_TMU_CORE_EN_SHIFT,
+       .tmu_status = EXYNOS5440_TMU_S0_7_STATUS,
+       .tmu_cur_temp = EXYNOS5440_TMU_S0_7_TEMP,
+       .threshold_th0 = EXYNOS5440_TMU_S0_7_TH0,
+       .threshold_th1 = EXYNOS5440_TMU_S0_7_TH1,
+       .threshold_th2 = EXYNOS5440_TMU_S0_7_TH2,
+       .threshold_th3_l0_shift = EXYNOS5440_TMU_TH_RISE4_SHIFT,
+       .tmu_inten = EXYNOS5440_TMU_S0_7_IRQEN,
+       .inten_rise_mask = EXYNOS5440_TMU_RISE_INT_MASK,
+       .inten_rise_shift = EXYNOS5440_TMU_RISE_INT_SHIFT,
+       .inten_fall_mask = EXYNOS5440_TMU_FALL_INT_MASK,
+       .inten_fall_shift = EXYNOS5440_TMU_FALL_INT_SHIFT,
+       .inten_rise0_shift = EXYNOS5440_TMU_INTEN_RISE0_SHIFT,
+       .inten_rise1_shift = EXYNOS5440_TMU_INTEN_RISE1_SHIFT,
+       .inten_rise2_shift = EXYNOS5440_TMU_INTEN_RISE2_SHIFT,
+       .inten_rise3_shift = EXYNOS5440_TMU_INTEN_RISE3_SHIFT,
+       .inten_fall0_shift = EXYNOS5440_TMU_INTEN_FALL0_SHIFT,
+       .tmu_intstat = EXYNOS5440_TMU_S0_7_IRQ,
+       .tmu_intclear = EXYNOS5440_TMU_S0_7_IRQ,
+       .tmu_irqstatus = EXYNOS5440_TMU_IRQ_STATUS,
+       .emul_con = EXYNOS5440_TMU_S0_7_DEBUG,
+       .emul_temp_shift = EXYNOS_EMUL_DATA_SHIFT,
+       .tmu_pmin = EXYNOS5440_TMU_PMIN,
+};
+
+#define EXYNOS5440_TMU_DATA \
+       .trigger_levels[0] = 100, \
+       .trigger_levels[4] = 105, \
+       .trigger_enable[0] = 1, \
+       .trigger_type[0] = SW_TRIP, \
+       .trigger_type[4] = HW_TRIP, \
+       .max_trigger_level = 5, \
+       .gain = 5, \
+       .reference_voltage = 16, \
+       .noise_cancel_mode = 4, \
+       .cal_type = TYPE_ONE_POINT_TRIMMING, \
+       .cal_mode = 0, \
+       .efuse_value = 0x5b2d, \
+       .min_efuse_value = 16, \
+       .max_efuse_value = 76, \
+       .first_point_trim = 25, \
+       .second_point_trim = 70, \
+       .default_temp_offset = 25, \
+       .type = SOC_ARCH_EXYNOS5440, \
+       .registers = &exynos5440_tmu_registers, \
+       .features = (TMU_SUPPORT_EMULATION | TMU_SUPPORT_FALLING_TRIP | \
+                       TMU_SUPPORT_MULTI_INST | TMU_SUPPORT_SHARED_MEMORY),
+
+struct exynos_tmu_init_data const exynos5440_default_tmu_data = {
+       .tmu_data = {
+               { EXYNOS5440_TMU_DATA } ,
+               { EXYNOS5440_TMU_DATA } ,
+               { EXYNOS5440_TMU_DATA } ,
+       },
+       .tmu_count = 3,
+};
+#endif
diff --git a/drivers/thermal/samsung/exynos_tmu_data.h b/drivers/thermal/samsung/exynos_tmu_data.h
new file mode 100644 (file)
index 0000000..dc7feb5
--- /dev/null
@@ -0,0 +1,155 @@
+/*
+ * exynos_tmu_data.h - Samsung EXYNOS tmu data header file
+ *
+ *  Copyright (C) 2013 Samsung Electronics
+ *  Amit Daniel Kachhap <amit.daniel@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ */
+
+#ifndef _EXYNOS_TMU_DATA_H
+#define _EXYNOS_TMU_DATA_H
+
+/* Exynos generic registers */
+#define EXYNOS_TMU_REG_TRIMINFO                0x0
+#define EXYNOS_TMU_REG_CONTROL         0x20
+#define EXYNOS_TMU_REG_STATUS          0x28
+#define EXYNOS_TMU_REG_CURRENT_TEMP    0x40
+#define EXYNOS_TMU_REG_INTEN           0x70
+#define EXYNOS_TMU_REG_INTSTAT         0x74
+#define EXYNOS_TMU_REG_INTCLEAR                0x78
+
+#define EXYNOS_TMU_TEMP_MASK           0xff
+#define EXYNOS_TMU_REF_VOLTAGE_SHIFT   24
+#define EXYNOS_TMU_REF_VOLTAGE_MASK    0x1f
+#define EXYNOS_TMU_BUF_SLOPE_SEL_MASK  0xf
+#define EXYNOS_TMU_BUF_SLOPE_SEL_SHIFT 8
+#define EXYNOS_TMU_CORE_EN_SHIFT       0
+
+/* Exynos4210 specific registers */
+#define EXYNOS4210_TMU_REG_THRESHOLD_TEMP      0x44
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL0 0x50
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL1 0x54
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL2 0x58
+#define EXYNOS4210_TMU_REG_TRIG_LEVEL3 0x5C
+#define EXYNOS4210_TMU_REG_PAST_TEMP0  0x60
+#define EXYNOS4210_TMU_REG_PAST_TEMP1  0x64
+#define EXYNOS4210_TMU_REG_PAST_TEMP2  0x68
+#define EXYNOS4210_TMU_REG_PAST_TEMP3  0x6C
+
+#define EXYNOS4210_TMU_TRIG_LEVEL0_MASK        0x1
+#define EXYNOS4210_TMU_TRIG_LEVEL1_MASK        0x10
+#define EXYNOS4210_TMU_TRIG_LEVEL2_MASK        0x100
+#define EXYNOS4210_TMU_TRIG_LEVEL3_MASK        0x1000
+#define EXYNOS4210_TMU_TRIG_LEVEL_MASK 0x1111
+#define EXYNOS4210_TMU_INTCLEAR_VAL    0x1111
+
+/* Exynos5250 and Exynos4412 specific registers */
+#define EXYNOS_TMU_TRIMINFO_CON        0x14
+#define EXYNOS_THD_TEMP_RISE           0x50
+#define EXYNOS_THD_TEMP_FALL           0x54
+#define EXYNOS_EMUL_CON                0x80
+
+#define EXYNOS_TRIMINFO_RELOAD_SHIFT   1
+#define EXYNOS_TRIMINFO_25_SHIFT       0
+#define EXYNOS_TRIMINFO_85_SHIFT       8
+#define EXYNOS_TMU_RISE_INT_MASK       0x111
+#define EXYNOS_TMU_RISE_INT_SHIFT      0
+#define EXYNOS_TMU_FALL_INT_MASK       0x111
+#define EXYNOS_TMU_FALL_INT_SHIFT      12
+#define EXYNOS_TMU_CLEAR_RISE_INT      0x111
+#define EXYNOS_TMU_CLEAR_FALL_INT      (0x111 << 12)
+#define EXYNOS_TMU_TRIP_MODE_SHIFT     13
+#define EXYNOS_TMU_TRIP_MODE_MASK      0x7
+#define EXYNOS_TMU_THERM_TRIP_EN_SHIFT 12
+#define EXYNOS_TMU_CALIB_MODE_SHIFT    4
+#define EXYNOS_TMU_CALIB_MODE_MASK     0x3
+
+#define EXYNOS_TMU_INTEN_RISE0_SHIFT   0
+#define EXYNOS_TMU_INTEN_RISE1_SHIFT   4
+#define EXYNOS_TMU_INTEN_RISE2_SHIFT   8
+#define EXYNOS_TMU_INTEN_RISE3_SHIFT   12
+#define EXYNOS_TMU_INTEN_FALL0_SHIFT   16
+#define EXYNOS_TMU_INTEN_FALL1_SHIFT   20
+#define EXYNOS_TMU_INTEN_FALL2_SHIFT   24
+
+#define EXYNOS_EMUL_TIME       0x57F0
+#define EXYNOS_EMUL_TIME_MASK  0xffff
+#define EXYNOS_EMUL_TIME_SHIFT 16
+#define EXYNOS_EMUL_DATA_SHIFT 8
+#define EXYNOS_EMUL_DATA_MASK  0xFF
+#define EXYNOS_EMUL_ENABLE     0x1
+
+#define EXYNOS_MAX_TRIGGER_PER_REG     4
+
+/*exynos5440 specific registers*/
+#define EXYNOS5440_TMU_S0_7_TRIM               0x000
+#define EXYNOS5440_TMU_S0_7_CTRL               0x020
+#define EXYNOS5440_TMU_S0_7_DEBUG              0x040
+#define EXYNOS5440_TMU_S0_7_STATUS             0x060
+#define EXYNOS5440_TMU_S0_7_TEMP               0x0f0
+#define EXYNOS5440_TMU_S0_7_TH0                        0x110
+#define EXYNOS5440_TMU_S0_7_TH1                        0x130
+#define EXYNOS5440_TMU_S0_7_TH2                        0x150
+#define EXYNOS5440_TMU_S0_7_EVTEN              0x1F0
+#define EXYNOS5440_TMU_S0_7_IRQEN              0x210
+#define EXYNOS5440_TMU_S0_7_IRQ                        0x230
+/* exynos5440 common registers */
+#define EXYNOS5440_TMU_IRQ_STATUS              0x000
+#define EXYNOS5440_TMU_PMIN                    0x004
+#define EXYNOS5440_TMU_TEMP                    0x008
+
+#define EXYNOS5440_TMU_RISE_INT_MASK           0xf
+#define EXYNOS5440_TMU_RISE_INT_SHIFT          0
+#define EXYNOS5440_TMU_FALL_INT_MASK           0xf
+#define EXYNOS5440_TMU_FALL_INT_SHIFT          4
+#define EXYNOS5440_TMU_INTEN_RISE0_SHIFT       0
+#define EXYNOS5440_TMU_INTEN_RISE1_SHIFT       1
+#define EXYNOS5440_TMU_INTEN_RISE2_SHIFT       2
+#define EXYNOS5440_TMU_INTEN_RISE3_SHIFT       3
+#define EXYNOS5440_TMU_INTEN_FALL0_SHIFT       4
+#define EXYNOS5440_TMU_INTEN_FALL1_SHIFT       5
+#define EXYNOS5440_TMU_INTEN_FALL2_SHIFT       6
+#define EXYNOS5440_TMU_INTEN_FALL3_SHIFT       7
+#define EXYNOS5440_TMU_TH_RISE0_SHIFT          0
+#define EXYNOS5440_TMU_TH_RISE1_SHIFT          8
+#define EXYNOS5440_TMU_TH_RISE2_SHIFT          16
+#define EXYNOS5440_TMU_TH_RISE3_SHIFT          24
+#define EXYNOS5440_TMU_TH_RISE4_SHIFT          24
+#define EXYNOS5440_EFUSE_SWAP_OFFSET           8
+
+#if defined(CONFIG_CPU_EXYNOS4210)
+extern struct exynos_tmu_init_data const exynos4210_default_tmu_data;
+#define EXYNOS4210_TMU_DRV_DATA (&exynos4210_default_tmu_data)
+#else
+#define EXYNOS4210_TMU_DRV_DATA (NULL)
+#endif
+
+#if (defined(CONFIG_SOC_EXYNOS5250) || defined(CONFIG_SOC_EXYNOS4412))
+extern struct exynos_tmu_init_data const exynos5250_default_tmu_data;
+#define EXYNOS5250_TMU_DRV_DATA (&exynos5250_default_tmu_data)
+#else
+#define EXYNOS5250_TMU_DRV_DATA (NULL)
+#endif
+
+#if defined(CONFIG_SOC_EXYNOS5440)
+extern struct exynos_tmu_init_data const exynos5440_default_tmu_data;
+#define EXYNOS5440_TMU_DRV_DATA (&exynos5440_default_tmu_data)
+#else
+#define EXYNOS5440_TMU_DRV_DATA (NULL)
+#endif
+
+#endif /*_EXYNOS_TMU_DATA_H*/
index 4d4ddae..d89e781 100644 (file)
@@ -51,44 +51,51 @@ static unsigned long get_target_state(struct thermal_instance *instance,
 {
        struct thermal_cooling_device *cdev = instance->cdev;
        unsigned long cur_state;
+       unsigned long next_target;
 
+       /*
+        * We keep this instance the way it is by default.
+        * Otherwise, we use the current state of the
+        * cdev in use to determine the next_target.
+        */
        cdev->ops->get_cur_state(cdev, &cur_state);
+       next_target = instance->target;
 
        switch (trend) {
        case THERMAL_TREND_RAISING:
                if (throttle) {
-                       cur_state = cur_state < instance->upper ?
+                       next_target = cur_state < instance->upper ?
                                    (cur_state + 1) : instance->upper;
-                       if (cur_state < instance->lower)
-                               cur_state = instance->lower;
+                       if (next_target < instance->lower)
+                               next_target = instance->lower;
                }
                break;
        case THERMAL_TREND_RAISE_FULL:
                if (throttle)
-                       cur_state = instance->upper;
+                       next_target = instance->upper;
                break;
        case THERMAL_TREND_DROPPING:
                if (cur_state == instance->lower) {
                        if (!throttle)
-                               cur_state = -1;
+                               next_target = THERMAL_NO_TARGET;
                } else {
-                       cur_state -= 1;
-                       if (cur_state > instance->upper)
-                               cur_state = instance->upper;
+                       next_target = cur_state - 1;
+                       if (next_target > instance->upper)
+                               next_target = instance->upper;
                }
                break;
        case THERMAL_TREND_DROP_FULL:
                if (cur_state == instance->lower) {
                        if (!throttle)
-                               cur_state = -1;
+                               next_target = THERMAL_NO_TARGET;
                } else
-                       cur_state = instance->lower;
+                       next_target = instance->lower;
                break;
        default:
                break;
        }
 
-       return cur_state;
+       return next_target;
 }
 
 static void update_passive_instance(struct thermal_zone_device *tz,
@@ -133,6 +140,9 @@ static void thermal_zone_trip_update(struct thermal_zone_device *tz, int trip)
                old_target = instance->target;
                instance->target = get_target_state(instance, trend, throttle);
 
+               if (old_target == instance->target)
+                       continue;
+
                /* Activate a passive thermal instance */
                if (old_target == THERMAL_NO_TARGET &&
                        instance->target != THERMAL_NO_TARGET)
index 1f02e8e..4962a6a 100644 (file)
@@ -38,6 +38,7 @@
 #include <net/genetlink.h>
 
 #include "thermal_core.h"
+#include "thermal_hwmon.h"
 
 MODULE_AUTHOR("Zhang Rui");
 MODULE_DESCRIPTION("Generic thermal management sysfs support");
@@ -201,14 +202,23 @@ static void print_bind_err_msg(struct thermal_zone_device *tz,
 }
 
 static void __bind(struct thermal_zone_device *tz, int mask,
-                       struct thermal_cooling_device *cdev)
+                       struct thermal_cooling_device *cdev,
+                       unsigned long *limits)
 {
        int i, ret;
 
        for (i = 0; i < tz->trips; i++) {
                if (mask & (1 << i)) {
+                       unsigned long upper, lower;
+
+                       upper = THERMAL_NO_LIMIT;
+                       lower = THERMAL_NO_LIMIT;
+                       if (limits) {
+                               lower = limits[i * 2];
+                               upper = limits[i * 2 + 1];
+                       }
                        ret = thermal_zone_bind_cooling_device(tz, i, cdev,
-                                       THERMAL_NO_LIMIT, THERMAL_NO_LIMIT);
+                                                              upper, lower);
                        if (ret)
                                print_bind_err_msg(tz, cdev, ret);
                }
@@ -253,7 +263,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev)
                        if (tzp->tbp[i].match(pos, cdev))
                                continue;
                        tzp->tbp[i].cdev = cdev;
-                       __bind(pos, tzp->tbp[i].trip_mask, cdev);
+                       __bind(pos, tzp->tbp[i].trip_mask, cdev,
+                              tzp->tbp[i].binding_limits);
                }
        }
 
@@ -291,7 +302,8 @@ static void bind_tz(struct thermal_zone_device *tz)
                        if (tzp->tbp[i].match(tz, pos))
                                continue;
                        tzp->tbp[i].cdev = pos;
-                       __bind(tz, tzp->tbp[i].trip_mask, pos);
+                       __bind(tz, tzp->tbp[i].trip_mask, pos,
+                              tzp->tbp[i].binding_limits);
                }
        }
 exit:
@@ -859,260 +871,6 @@ thermal_cooling_device_trip_point_show(struct device *dev,
 
 /* Device management */
 
-#if defined(CONFIG_THERMAL_HWMON)
-
-/* hwmon sys I/F */
-#include <linux/hwmon.h>
-
-/* thermal zone devices with the same type share one hwmon device */
-struct thermal_hwmon_device {
-       char type[THERMAL_NAME_LENGTH];
-       struct device *device;
-       int count;
-       struct list_head tz_list;
-       struct list_head node;
-};
-
-struct thermal_hwmon_attr {
-       struct device_attribute attr;
-       char name[16];
-};
-
-/* one temperature input for each thermal zone */
-struct thermal_hwmon_temp {
-       struct list_head hwmon_node;
-       struct thermal_zone_device *tz;
-       struct thermal_hwmon_attr temp_input;   /* hwmon sys attr */
-       struct thermal_hwmon_attr temp_crit;    /* hwmon sys attr */
-};
-
-static LIST_HEAD(thermal_hwmon_list);
-
-static ssize_t
-name_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev);
-       return sprintf(buf, "%s\n", hwmon->type);
-}
-static DEVICE_ATTR(name, 0444, name_show, NULL);
-
-static ssize_t
-temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
-{
-       long temperature;
-       int ret;
-       struct thermal_hwmon_attr *hwmon_attr
-                       = container_of(attr, struct thermal_hwmon_attr, attr);
-       struct thermal_hwmon_temp *temp
-                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
-                                      temp_input);
-       struct thermal_zone_device *tz = temp->tz;
-
-       ret = thermal_zone_get_temp(tz, &temperature);
-
-       if (ret)
-               return ret;
-
-       return sprintf(buf, "%ld\n", temperature);
-}
-
-static ssize_t
-temp_crit_show(struct device *dev, struct device_attribute *attr,
-               char *buf)
-{
-       struct thermal_hwmon_attr *hwmon_attr
-                       = container_of(attr, struct thermal_hwmon_attr, attr);
-       struct thermal_hwmon_temp *temp
-                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
-                                      temp_crit);
-       struct thermal_zone_device *tz = temp->tz;
-       long temperature;
-       int ret;
-
-       ret = tz->ops->get_trip_temp(tz, 0, &temperature);
-       if (ret)
-               return ret;
-
-       return sprintf(buf, "%ld\n", temperature);
-}
-
-
-static struct thermal_hwmon_device *
-thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
-{
-       struct thermal_hwmon_device *hwmon;
-
-       mutex_lock(&thermal_list_lock);
-       list_for_each_entry(hwmon, &thermal_hwmon_list, node)
-               if (!strcmp(hwmon->type, tz->type)) {
-                       mutex_unlock(&thermal_list_lock);
-                       return hwmon;
-               }
-       mutex_unlock(&thermal_list_lock);
-
-       return NULL;
-}
-
-/* Find the temperature input matching a given thermal zone */
-static struct thermal_hwmon_temp *
-thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
-                         const struct thermal_zone_device *tz)
-{
-       struct thermal_hwmon_temp *temp;
-
-       mutex_lock(&thermal_list_lock);
-       list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
-               if (temp->tz == tz) {
-                       mutex_unlock(&thermal_list_lock);
-                       return temp;
-               }
-       mutex_unlock(&thermal_list_lock);
-
-       return NULL;
-}
-
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-       struct thermal_hwmon_device *hwmon;
-       struct thermal_hwmon_temp *temp;
-       int new_hwmon_device = 1;
-       int result;
-
-       hwmon = thermal_hwmon_lookup_by_type(tz);
-       if (hwmon) {
-               new_hwmon_device = 0;
-               goto register_sys_interface;
-       }
-
-       hwmon = kzalloc(sizeof(struct thermal_hwmon_device), GFP_KERNEL);
-       if (!hwmon)
-               return -ENOMEM;
-
-       INIT_LIST_HEAD(&hwmon->tz_list);
-       strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH);
-       hwmon->device = hwmon_device_register(NULL);
-       if (IS_ERR(hwmon->device)) {
-               result = PTR_ERR(hwmon->device);
-               goto free_mem;
-       }
-       dev_set_drvdata(hwmon->device, hwmon);
-       result = device_create_file(hwmon->device, &dev_attr_name);
-       if (result)
-               goto free_mem;
-
- register_sys_interface:
-       temp = kzalloc(sizeof(struct thermal_hwmon_temp), GFP_KERNEL);
-       if (!temp) {
-               result = -ENOMEM;
-               goto unregister_name;
-       }
-
-       temp->tz = tz;
-       hwmon->count++;
-
-       snprintf(temp->temp_input.name, sizeof(temp->temp_input.name),
-                "temp%d_input", hwmon->count);
-       temp->temp_input.attr.attr.name = temp->temp_input.name;
-       temp->temp_input.attr.attr.mode = 0444;
-       temp->temp_input.attr.show = temp_input_show;
-       sysfs_attr_init(&temp->temp_input.attr.attr);
-       result = device_create_file(hwmon->device, &temp->temp_input.attr);
-       if (result)
-               goto free_temp_mem;
-
-       if (tz->ops->get_crit_temp) {
-               unsigned long temperature;
-               if (!tz->ops->get_crit_temp(tz, &temperature)) {
-                       snprintf(temp->temp_crit.name,
-                                sizeof(temp->temp_crit.name),
-                               "temp%d_crit", hwmon->count);
-                       temp->temp_crit.attr.attr.name = temp->temp_crit.name;
-                       temp->temp_crit.attr.attr.mode = 0444;
-                       temp->temp_crit.attr.show = temp_crit_show;
-                       sysfs_attr_init(&temp->temp_crit.attr.attr);
-                       result = device_create_file(hwmon->device,
-                                                   &temp->temp_crit.attr);
-                       if (result)
-                               goto unregister_input;
-               }
-       }
-
-       mutex_lock(&thermal_list_lock);
-       if (new_hwmon_device)
-               list_add_tail(&hwmon->node, &thermal_hwmon_list);
-       list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
-       mutex_unlock(&thermal_list_lock);
-
-       return 0;
-
- unregister_input:
-       device_remove_file(hwmon->device, &temp->temp_input.attr);
- free_temp_mem:
-       kfree(temp);
- unregister_name:
-       if (new_hwmon_device) {
-               device_remove_file(hwmon->device, &dev_attr_name);
-               hwmon_device_unregister(hwmon->device);
-       }
- free_mem:
-       if (new_hwmon_device)
-               kfree(hwmon);
-
-       return result;
-}
-
-static void
-thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-       struct thermal_hwmon_device *hwmon;
-       struct thermal_hwmon_temp *temp;
-
-       hwmon = thermal_hwmon_lookup_by_type(tz);
-       if (unlikely(!hwmon)) {
-               /* Should never happen... */
-               dev_dbg(&tz->device, "hwmon device lookup failed!\n");
-               return;
-       }
-
-       temp = thermal_hwmon_lookup_temp(hwmon, tz);
-       if (unlikely(!temp)) {
-               /* Should never happen... */
-               dev_dbg(&tz->device, "temperature input lookup failed!\n");
-               return;
-       }
-
-       device_remove_file(hwmon->device, &temp->temp_input.attr);
-       if (tz->ops->get_crit_temp)
-               device_remove_file(hwmon->device, &temp->temp_crit.attr);
-
-       mutex_lock(&thermal_list_lock);
-       list_del(&temp->hwmon_node);
-       kfree(temp);
-       if (!list_empty(&hwmon->tz_list)) {
-               mutex_unlock(&thermal_list_lock);
-               return;
-       }
-       list_del(&hwmon->node);
-       mutex_unlock(&thermal_list_lock);
-
-       device_remove_file(hwmon->device, &dev_attr_name);
-       hwmon_device_unregister(hwmon->device);
-       kfree(hwmon);
-}
-#else
-static int
-thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-       return 0;
-}
-
-static void
-thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
-{
-}
-#endif
-
 /**
  * thermal_zone_bind_cooling_device() - bind a cooling device to a thermal zone
  * @tz:                pointer to struct thermal_zone_device
@@ -1715,9 +1473,11 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type,
 
        mutex_unlock(&thermal_governor_lock);
 
-       result = thermal_add_hwmon_sysfs(tz);
-       if (result)
-               goto unregister;
+       if (!tz->tzp || !tz->tzp->no_hwmon) {
+               result = thermal_add_hwmon_sysfs(tz);
+               if (result)
+                       goto unregister;
+       }
 
        mutex_lock(&thermal_list_lock);
        list_add_tail(&tz->node, &thermal_tz_list);
diff --git a/drivers/thermal/thermal_hwmon.c b/drivers/thermal/thermal_hwmon.c
new file mode 100644 (file)
index 0000000..eeef0e2
--- /dev/null
@@ -0,0 +1,269 @@
+/*
+ *  thermal_hwmon.c - Generic Thermal Management hwmon support.
+ *
+ *  Code based on Intel thermal_core.c. Copyrights of the original code:
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  Copyright (C) 2013 Texas Instruments
+ *  Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <linux/hwmon.h>
+#include <linux/thermal.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include "thermal_hwmon.h"
+
+/* hwmon sys I/F */
+/* thermal zone devices with the same type share one hwmon device */
+struct thermal_hwmon_device {
+       char type[THERMAL_NAME_LENGTH];
+       struct device *device;
+       int count;
+       struct list_head tz_list;
+       struct list_head node;
+};
+
+struct thermal_hwmon_attr {
+       struct device_attribute attr;
+       char name[16];
+};
+
+/* one temperature input for each thermal zone */
+struct thermal_hwmon_temp {
+       struct list_head hwmon_node;
+       struct thermal_zone_device *tz;
+       struct thermal_hwmon_attr temp_input;   /* hwmon sys attr */
+       struct thermal_hwmon_attr temp_crit;    /* hwmon sys attr */
+};
+
+static LIST_HEAD(thermal_hwmon_list);
+
+static DEFINE_MUTEX(thermal_hwmon_list_lock);
+
+static ssize_t
+name_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct thermal_hwmon_device *hwmon = dev_get_drvdata(dev);
+       return sprintf(buf, "%s\n", hwmon->type);
+}
+static DEVICE_ATTR(name, 0444, name_show, NULL);
+
+static ssize_t
+temp_input_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       long temperature;
+       int ret;
+       struct thermal_hwmon_attr *hwmon_attr
+                       = container_of(attr, struct thermal_hwmon_attr, attr);
+       struct thermal_hwmon_temp *temp
+                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
+                                      temp_input);
+       struct thermal_zone_device *tz = temp->tz;
+
+       ret = thermal_zone_get_temp(tz, &temperature);
+
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%ld\n", temperature);
+}
+
+static ssize_t
+temp_crit_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       struct thermal_hwmon_attr *hwmon_attr
+                       = container_of(attr, struct thermal_hwmon_attr, attr);
+       struct thermal_hwmon_temp *temp
+                       = container_of(hwmon_attr, struct thermal_hwmon_temp,
+                                      temp_crit);
+       struct thermal_zone_device *tz = temp->tz;
+       long temperature;
+       int ret;
+
+       ret = tz->ops->get_trip_temp(tz, 0, &temperature);
+       if (ret)
+               return ret;
+
+       return sprintf(buf, "%ld\n", temperature);
+}
+
+
+static struct thermal_hwmon_device *
+thermal_hwmon_lookup_by_type(const struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_device *hwmon;
+
+       mutex_lock(&thermal_hwmon_list_lock);
+       list_for_each_entry(hwmon, &thermal_hwmon_list, node)
+               if (!strcmp(hwmon->type, tz->type)) {
+                       mutex_unlock(&thermal_hwmon_list_lock);
+                       return hwmon;
+               }
+       mutex_unlock(&thermal_hwmon_list_lock);
+
+       return NULL;
+}
+
+/* Find the temperature input matching a given thermal zone */
+static struct thermal_hwmon_temp *
+thermal_hwmon_lookup_temp(const struct thermal_hwmon_device *hwmon,
+                         const struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_temp *temp;
+
+       mutex_lock(&thermal_hwmon_list_lock);
+       list_for_each_entry(temp, &hwmon->tz_list, hwmon_node)
+               if (temp->tz == tz) {
+                       mutex_unlock(&thermal_hwmon_list_lock);
+                       return temp;
+               }
+       mutex_unlock(&thermal_hwmon_list_lock);
+
+       return NULL;
+}
+
+int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_device *hwmon;
+       struct thermal_hwmon_temp *temp;
+       int new_hwmon_device = 1;
+       int result;
+
+       hwmon = thermal_hwmon_lookup_by_type(tz);
+       if (hwmon) {
+               new_hwmon_device = 0;
+               goto register_sys_interface;
+       }
+
+       hwmon = kzalloc(sizeof(*hwmon), GFP_KERNEL);
+       if (!hwmon)
+               return -ENOMEM;
+
+       INIT_LIST_HEAD(&hwmon->tz_list);
+       strlcpy(hwmon->type, tz->type, THERMAL_NAME_LENGTH);
+       hwmon->device = hwmon_device_register(&tz->device);
+       if (IS_ERR(hwmon->device)) {
+               result = PTR_ERR(hwmon->device);
+               goto free_mem;
+       }
+       dev_set_drvdata(hwmon->device, hwmon);
+       result = device_create_file(hwmon->device, &dev_attr_name);
+       if (result)
+               goto free_mem;
+
+ register_sys_interface:
+       temp = kzalloc(sizeof(*temp), GFP_KERNEL);
+       if (!temp) {
+               result = -ENOMEM;
+               goto unregister_name;
+       }
+
+       temp->tz = tz;
+       hwmon->count++;
+
+       snprintf(temp->temp_input.name, sizeof(temp->temp_input.name),
+                "temp%d_input", hwmon->count);
+       temp->temp_input.attr.attr.name = temp->temp_input.name;
+       temp->temp_input.attr.attr.mode = 0444;
+       temp->temp_input.attr.show = temp_input_show;
+       sysfs_attr_init(&temp->temp_input.attr.attr);
+       result = device_create_file(hwmon->device, &temp->temp_input.attr);
+       if (result)
+               goto free_temp_mem;
+
+       if (tz->ops->get_crit_temp) {
+               unsigned long temperature;
+               if (!tz->ops->get_crit_temp(tz, &temperature)) {
+                       snprintf(temp->temp_crit.name,
+                                sizeof(temp->temp_crit.name),
+                               "temp%d_crit", hwmon->count);
+                       temp->temp_crit.attr.attr.name = temp->temp_crit.name;
+                       temp->temp_crit.attr.attr.mode = 0444;
+                       temp->temp_crit.attr.show = temp_crit_show;
+                       sysfs_attr_init(&temp->temp_crit.attr.attr);
+                       result = device_create_file(hwmon->device,
+                                                   &temp->temp_crit.attr);
+                       if (result)
+                               goto unregister_input;
+               }
+       }
+
+       mutex_lock(&thermal_hwmon_list_lock);
+       if (new_hwmon_device)
+               list_add_tail(&hwmon->node, &thermal_hwmon_list);
+       list_add_tail(&temp->hwmon_node, &hwmon->tz_list);
+       mutex_unlock(&thermal_hwmon_list_lock);
+
+       return 0;
+
+ unregister_input:
+       device_remove_file(hwmon->device, &temp->temp_input.attr);
+ free_temp_mem:
+       kfree(temp);
+ unregister_name:
+       if (new_hwmon_device) {
+               device_remove_file(hwmon->device, &dev_attr_name);
+               hwmon_device_unregister(hwmon->device);
+       }
+ free_mem:
+       if (new_hwmon_device)
+               kfree(hwmon);
+
+       return result;
+}
+
+void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+       struct thermal_hwmon_device *hwmon;
+       struct thermal_hwmon_temp *temp;
+
+       hwmon = thermal_hwmon_lookup_by_type(tz);
+       if (unlikely(!hwmon)) {
+               /* Should never happen... */
+               dev_dbg(&tz->device, "hwmon device lookup failed!\n");
+               return;
+       }
+
+       temp = thermal_hwmon_lookup_temp(hwmon, tz);
+       if (unlikely(!temp)) {
+               /* Should never happen... */
+               dev_dbg(&tz->device, "temperature input lookup failed!\n");
+               return;
+       }
+
+       device_remove_file(hwmon->device, &temp->temp_input.attr);
+       if (tz->ops->get_crit_temp)
+               device_remove_file(hwmon->device, &temp->temp_crit.attr);
+
+       mutex_lock(&thermal_hwmon_list_lock);
+       list_del(&temp->hwmon_node);
+       kfree(temp);
+       if (!list_empty(&hwmon->tz_list)) {
+               mutex_unlock(&thermal_hwmon_list_lock);
+               return;
+       }
+       list_del(&hwmon->node);
+       mutex_unlock(&thermal_hwmon_list_lock);
+
+       device_remove_file(hwmon->device, &dev_attr_name);
+       hwmon_device_unregister(hwmon->device);
+       kfree(hwmon);
+}
diff --git a/drivers/thermal/thermal_hwmon.h b/drivers/thermal/thermal_hwmon.h
new file mode 100644 (file)
index 0000000..c798fdb
--- /dev/null
@@ -0,0 +1,49 @@
+/*
+ *  thermal_hwmon.h - Generic Thermal Management hwmon support.
+ *
+ *  Code based on Intel thermal_core.c. Copyrights of the original code:
+ *  Copyright (C) 2008 Intel Corp
+ *  Copyright (C) 2008 Zhang Rui <rui.zhang@intel.com>
+ *  Copyright (C) 2008 Sujith Thomas <sujith.thomas@intel.com>
+ *
+ *  Copyright (C) 2013 Texas Instruments
+ *  Copyright (C) 2013 Eduardo Valentin <eduardo.valentin@ti.com>
+ *  ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License along
+ *  with this program; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef __THERMAL_HWMON_H__
+#define __THERMAL_HWMON_H__
+
+#include <linux/thermal.h>
+
+#ifdef CONFIG_THERMAL_HWMON
+int thermal_add_hwmon_sysfs(struct thermal_zone_device *tz);
+void thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz);
+#else
+static int
+thermal_add_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+       return 0;
+}
+
+static void
+thermal_remove_hwmon_sysfs(struct thermal_zone_device *tz)
+{
+}
+#endif
+
+#endif /* __THERMAL_HWMON_H__ */
index e5d8326..a492927 100644 (file)
@@ -42,6 +42,7 @@ dra752_core_temp_sensor_registers = {
        .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_CORE_MASK,
        .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_CORE_MASK,
        .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK,
+       .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK,
        .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_CORE_MASK,
        .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_CORE_MASK,
        .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_CORE_MASK,
@@ -77,6 +78,7 @@ dra752_iva_temp_sensor_registers = {
        .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_IVA_MASK,
        .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_IVA_MASK,
        .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK,
+       .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK,
        .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_IVA_MASK,
        .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_IVA_MASK,
        .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_IVA_MASK,
@@ -112,6 +114,7 @@ dra752_mpu_temp_sensor_registers = {
        .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_MPU_MASK,
        .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_MPU_MASK,
        .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK,
+       .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK,
        .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_MPU_MASK,
        .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_MPU_MASK,
        .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_MPU_MASK,
@@ -147,6 +150,7 @@ dra752_dspeve_temp_sensor_registers = {
        .mask_hot_mask = DRA752_BANDGAP_CTRL_2_MASK_HOT_DSPEVE_MASK,
        .mask_cold_mask = DRA752_BANDGAP_CTRL_2_MASK_COLD_DSPEVE_MASK,
        .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK,
+       .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK,
        .mask_freeze_mask = DRA752_BANDGAP_CTRL_2_FREEZE_DSPEVE_MASK,
        .mask_clear_mask = DRA752_BANDGAP_CTRL_2_CLEAR_DSPEVE_MASK,
        .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_2_CLEAR_ACCUM_DSPEVE_MASK,
@@ -182,6 +186,7 @@ dra752_gpu_temp_sensor_registers = {
        .mask_hot_mask = DRA752_BANDGAP_CTRL_1_MASK_HOT_GPU_MASK,
        .mask_cold_mask = DRA752_BANDGAP_CTRL_1_MASK_COLD_GPU_MASK,
        .mask_sidlemode_mask = DRA752_BANDGAP_CTRL_1_SIDLEMODE_MASK,
+       .mask_counter_delay_mask = DRA752_BANDGAP_CTRL_1_COUNTER_DELAY_MASK,
        .mask_freeze_mask = DRA752_BANDGAP_CTRL_1_FREEZE_GPU_MASK,
        .mask_clear_mask = DRA752_BANDGAP_CTRL_1_CLEAR_GPU_MASK,
        .mask_clear_accum_mask = DRA752_BANDGAP_CTRL_1_CLEAR_ACCUM_GPU_MASK,
index 9dfd471..74c0e34 100644 (file)
@@ -1020,9 +1020,13 @@ int ti_bandgap_get_trend(struct ti_bandgap *bgp, int id, int *trend)
 
        /* Fetch the update interval */
        ret = ti_bandgap_read_update_interval(bgp, id, &interval);
-       if (ret || !interval)
+       if (ret)
                goto unfreeze;
 
+       /* Set the interval to 1 ms if bandgap counter delay is not set */
+       if (interval == 0)
+               interval = 1;
+
        *trend = (t1 - t2) / interval;
 
        dev_dbg(bgp->dev, "The temperatures are t1 = %d and t2 = %d and trend =%d\n",
index 4c5f55c..4f8b9af 100644 (file)
@@ -174,6 +174,9 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal,
                               enum thermal_device_mode mode)
 {
        struct ti_thermal_data *data = thermal->devdata;
+       struct ti_bandgap *bgp;
+
+       bgp = data->bgp;
 
        if (!data->ti_thermal) {
                dev_notice(&thermal->device, "thermal zone not registered\n");
@@ -190,6 +193,8 @@ static int ti_thermal_set_mode(struct thermal_zone_device *thermal,
        mutex_unlock(&data->ti_thermal->lock);
 
        data->mode = mode;
+       ti_bandgap_write_update_interval(bgp, data->sensor_id,
+                                       data->ti_thermal->polling_delay);
        thermal_zone_device_update(data->ti_thermal);
        dev_dbg(&thermal->device, "thermal polling set for duration=%d msec\n",
                data->ti_thermal->polling_delay);
@@ -313,6 +318,8 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
        }
        data->ti_thermal->polling_delay = FAST_TEMP_MONITORING_RATE;
        ti_bandgap_set_sensor_data(bgp, id, data);
+       ti_bandgap_write_update_interval(bgp, data->sensor_id,
+                                       data->ti_thermal->polling_delay);
 
        return 0;
 }
index 47c6e7b..febd45c 100644 (file)
@@ -5,7 +5,7 @@
 if TTY
 
 menu "Serial drivers"
-       depends on HAS_IOMEM && GENERIC_HARDIRQS
+       depends on HAS_IOMEM
 
 source "drivers/tty/serial/8250/Kconfig"
 
index a9355ce..3a1a01a 100644 (file)
@@ -854,7 +854,8 @@ void disassociate_ctty(int on_exit)
                        struct pid *tty_pgrp = tty_get_pgrp(tty);
                        if (tty_pgrp) {
                                kill_pgrp(tty_pgrp, SIGHUP, on_exit);
-                               kill_pgrp(tty_pgrp, SIGCONT, on_exit);
+                               if (!on_exit)
+                                       kill_pgrp(tty_pgrp, SIGCONT, on_exit);
                                put_pid(tty_pgrp);
                        }
                }
index f969ea2..b870872 100644 (file)
@@ -1,6 +1,6 @@
 config USB_DWC3
        tristate "DesignWare USB3 DRD Core Support"
-       depends on (USB || USB_GADGET) && GENERIC_HARDIRQS && HAS_DMA
+       depends on (USB || USB_GADGET) && HAS_DMA
        depends on EXTCON
        select USB_XHCI_PLATFORM if USB_SUPPORT && USB_XHCI_HCD
        help
index 30e2dd8..48cddf3 100644 (file)
@@ -313,7 +313,7 @@ config USB_S3C_HSUDC
 
 config USB_MV_UDC
        tristate "Marvell USB2.0 Device Controller"
-       depends on GENERIC_HARDIRQS && HAS_DMA
+       depends on HAS_DMA
        help
          Marvell Socs (including PXA and MMP series) include a high speed
          USB2.0 OTG controller, which can be configured as high speed or
@@ -425,7 +425,7 @@ config USB_GOKU
 
 config USB_EG20T
        tristate "Intel EG20T PCH/LAPIS Semiconductor IOH(ML7213/ML7831) UDC"
-       depends on PCI && GENERIC_HARDIRQS
+       depends on PCI
        help
          This is a USB device driver for EG20T PCH.
          EG20T PCH is the platform controller hub that is used in Intel's
index 465ef8e..b94c049 100644 (file)
@@ -524,7 +524,7 @@ struct kiocb_priv {
        unsigned                actual;
 };
 
-static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
+static int ep_aio_cancel(struct kiocb *iocb)
 {
        struct kiocb_priv       *priv = iocb->private;
        struct ep_data          *epdata;
@@ -540,7 +540,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
        // spin_unlock(&epdata->dev->lock);
        local_irq_enable();
 
-       aio_put_req(iocb);
        return value;
 }
 
@@ -709,11 +708,11 @@ ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
        if (unlikely(usb_endpoint_dir_in(&epdata->desc)))
                return -EINVAL;
 
-       buf = kmalloc(iocb->ki_left, GFP_KERNEL);
+       buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
        if (unlikely(!buf))
                return -ENOMEM;
 
-       return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs);
+       return ep_aio_rwtail(iocb, buf, iocb->ki_nbytes, epdata, iov, nr_segs);
 }
 
 static ssize_t
@@ -728,7 +727,7 @@ ep_aio_write(struct kiocb *iocb, const struct iovec *iov,
        if (unlikely(!usb_endpoint_dir_in(&epdata->desc)))
                return -EINVAL;
 
-       buf = kmalloc(iocb->ki_left, GFP_KERNEL);
+       buf = kmalloc(iocb->ki_nbytes, GFP_KERNEL);
        if (unlikely(!buf))
                return -ENOMEM;
 
index 5be0326..b3f20d7 100644 (file)
@@ -278,7 +278,6 @@ endif # USB_EHCI_HCD
 
 config USB_OXU210HP_HCD
        tristate "OXU210HP HCD support"
-       depends on GENERIC_HARDIRQS
        ---help---
          The OXU210HP is an USB host/OTG/device controller. Enable this
          option if your board has this chip. If unsure, say N.
index c64ee09..c258a97 100644 (file)
@@ -71,7 +71,6 @@ config USB_MUSB_DA8XX
 
 config USB_MUSB_TUSB6010
        tristate "TUSB6010"
-       depends on GENERIC_HARDIRQS
 
 config USB_MUSB_OMAP2PLUS
        tristate "OMAP2430 and onwards"
index 019bf7e..1c4195a 100644 (file)
@@ -4,7 +4,7 @@
 
 config USB_RENESAS_USBHS
        tristate 'Renesas USBHS controller'
-       depends on USB_GADGET && GENERIC_HARDIRQS
+       depends on USB_GADGET
        default n
        help
          Renesas USBHS is a discrete USB host and peripheral controller chip
index 0c27c7d..4b79a1f 100644 (file)
@@ -1,12 +1,12 @@
 /*******************************************************************************
  * Vhost kernel TCM fabric driver for virtio SCSI initiators
  *
- * (C) Copyright 2010-2012 RisingTide Systems LLC.
+ * (C) Copyright 2010-2013 Datera, Inc.
  * (C) Copyright 2010-2012 IBM Corp.
  *
  * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
  *
- * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com>
+ * Authors: Nicholas A. Bellinger <nab@daterainc.com>
  *          Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
  *
  * This program is free software; you can redistribute it and/or modify
 #include <linux/virtio_scsi.h>
 #include <linux/llist.h>
 #include <linux/bitmap.h>
+#include <linux/percpu_ida.h>
 
 #include "vhost.h"
 
 #define TCM_VHOST_VERSION  "v0.1"
 #define TCM_VHOST_NAMELEN 256
 #define TCM_VHOST_MAX_CDB_SIZE 32
+#define TCM_VHOST_DEFAULT_TAGS 256
+#define TCM_VHOST_PREALLOC_SGLS 2048
+#define TCM_VHOST_PREALLOC_PAGES 2048
 
 struct vhost_scsi_inflight {
        /* Wait for the flush operation to finish */
@@ -79,6 +83,7 @@ struct tcm_vhost_cmd {
        u32 tvc_lun;
        /* Pointer to the SGL formatted memory from virtio-scsi */
        struct scatterlist *tvc_sgl;
+       struct page **tvc_upages;
        /* Pointer to response */
        struct virtio_scsi_cmd_resp __user *tvc_resp;
        /* Pointer to vhost_scsi for our device */
@@ -450,17 +455,16 @@ static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
 {
        struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
                                struct tcm_vhost_cmd, tvc_se_cmd);
+       struct se_session *se_sess = se_cmd->se_sess;
 
        if (tv_cmd->tvc_sgl_count) {
                u32 i;
                for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
                        put_page(sg_page(&tv_cmd->tvc_sgl[i]));
-
-               kfree(tv_cmd->tvc_sgl);
         }
 
        tcm_vhost_put_inflight(tv_cmd->inflight);
-       kfree(tv_cmd);
+       percpu_ida_free(&se_sess->sess_tag_pool, se_cmd->map_tag);
 }
 
 static int tcm_vhost_shutdown_session(struct se_session *se_sess)
@@ -704,7 +708,7 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
 }
 
 static struct tcm_vhost_cmd *
-vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
+vhost_scsi_get_tag(struct vhost_virtqueue *vq,
                        struct tcm_vhost_tpg *tpg,
                        struct virtio_scsi_cmd_req *v_req,
                        u32 exp_data_len,
@@ -712,18 +716,27 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
 {
        struct tcm_vhost_cmd *cmd;
        struct tcm_vhost_nexus *tv_nexus;
+       struct se_session *se_sess;
+       struct scatterlist *sg;
+       struct page **pages;
+       int tag;
 
        tv_nexus = tpg->tpg_nexus;
        if (!tv_nexus) {
                pr_err("Unable to locate active struct tcm_vhost_nexus\n");
                return ERR_PTR(-EIO);
        }
+       se_sess = tv_nexus->tvn_se_sess;
 
-       cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC);
-       if (!cmd) {
-               pr_err("Unable to allocate struct tcm_vhost_cmd\n");
-               return ERR_PTR(-ENOMEM);
-       }
+       tag = percpu_ida_alloc(&se_sess->sess_tag_pool, GFP_KERNEL);
+       cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[tag];
+       sg = cmd->tvc_sgl;
+       pages = cmd->tvc_upages;
+       memset(cmd, 0, sizeof(struct tcm_vhost_cmd));
+
+       cmd->tvc_sgl = sg;
+       cmd->tvc_upages = pages;
+       cmd->tvc_se_cmd.map_tag = tag;
        cmd->tvc_tag = v_req->tag;
        cmd->tvc_task_attr = v_req->task_attr;
        cmd->tvc_exp_data_len = exp_data_len;
@@ -740,7 +753,8 @@ vhost_scsi_allocate_cmd(struct vhost_virtqueue *vq,
  * Returns the number of scatterlist entries used or -errno on error.
  */
 static int
-vhost_scsi_map_to_sgl(struct scatterlist *sgl,
+vhost_scsi_map_to_sgl(struct tcm_vhost_cmd *tv_cmd,
+                     struct scatterlist *sgl,
                      unsigned int sgl_count,
                      struct iovec *iov,
                      int write)
@@ -752,13 +766,25 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl,
        struct page **pages;
        int ret, i;
 
+       if (sgl_count > TCM_VHOST_PREALLOC_SGLS) {
+               pr_err("vhost_scsi_map_to_sgl() psgl_count: %u greater than"
+                      " preallocated TCM_VHOST_PREALLOC_SGLS: %u\n",
+                       sgl_count, TCM_VHOST_PREALLOC_SGLS);
+               return -ENOBUFS;
+       }
+
        pages_nr = iov_num_pages(iov);
        if (pages_nr > sgl_count)
                return -ENOBUFS;
 
-       pages = kmalloc(pages_nr * sizeof(struct page *), GFP_KERNEL);
-       if (!pages)
-               return -ENOMEM;
+       if (pages_nr > TCM_VHOST_PREALLOC_PAGES) {
+               pr_err("vhost_scsi_map_to_sgl() pages_nr: %u greater than"
+                      " preallocated TCM_VHOST_PREALLOC_PAGES: %u\n",
+                       pages_nr, TCM_VHOST_PREALLOC_PAGES);
+               return -ENOBUFS;
+       }
+
+       pages = tv_cmd->tvc_upages;
 
        ret = get_user_pages_fast((unsigned long)ptr, pages_nr, write, pages);
        /* No pages were pinned */
@@ -783,7 +809,6 @@ vhost_scsi_map_to_sgl(struct scatterlist *sgl,
        }
 
 out:
-       kfree(pages);
        return ret;
 }
 
@@ -807,24 +832,20 @@ vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *cmd,
 
        /* TODO overflow checking */
 
-       sg = kmalloc(sizeof(cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC);
-       if (!sg)
-               return -ENOMEM;
-       pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__,
-              sg, sgl_count, !sg);
+       sg = cmd->tvc_sgl;
+       pr_debug("%s sg %p sgl_count %u\n", __func__, sg, sgl_count);
        sg_init_table(sg, sgl_count);
 
-       cmd->tvc_sgl = sg;
        cmd->tvc_sgl_count = sgl_count;
 
        pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count);
        for (i = 0; i < niov; i++) {
-               ret = vhost_scsi_map_to_sgl(sg, sgl_count, &iov[i], write);
+               ret = vhost_scsi_map_to_sgl(cmd, sg, sgl_count, &iov[i],
+                                           write);
                if (ret < 0) {
                        for (i = 0; i < cmd->tvc_sgl_count; i++)
                                put_page(sg_page(&cmd->tvc_sgl[i]));
-                       kfree(cmd->tvc_sgl);
-                       cmd->tvc_sgl = NULL;
+
                        cmd->tvc_sgl_count = 0;
                        return ret;
                }
@@ -989,10 +1010,10 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
                for (i = 0; i < data_num; i++)
                        exp_data_len += vq->iov[data_first + i].iov_len;
 
-               cmd = vhost_scsi_allocate_cmd(vq, tpg, &v_req,
-                                       exp_data_len, data_direction);
+               cmd = vhost_scsi_get_tag(vq, tpg, &v_req,
+                                        exp_data_len, data_direction);
                if (IS_ERR(cmd)) {
-                       vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
+                       vq_err(vq, "vhost_scsi_get_tag failed %ld\n",
                                        PTR_ERR(cmd));
                        goto err_cmd;
                }
@@ -1654,11 +1675,31 @@ static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl)
        kfree(nacl);
 }
 
+static void tcm_vhost_free_cmd_map_res(struct tcm_vhost_nexus *nexus,
+                                      struct se_session *se_sess)
+{
+       struct tcm_vhost_cmd *tv_cmd;
+       unsigned int i;
+
+       if (!se_sess->sess_cmd_map)
+               return;
+
+       for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) {
+               tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
+
+               kfree(tv_cmd->tvc_sgl);
+               kfree(tv_cmd->tvc_upages);
+       }
+}
+
 static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
                                const char *name)
 {
        struct se_portal_group *se_tpg;
+       struct se_session *se_sess;
        struct tcm_vhost_nexus *tv_nexus;
+       struct tcm_vhost_cmd *tv_cmd;
+       unsigned int i;
 
        mutex_lock(&tpg->tv_tpg_mutex);
        if (tpg->tpg_nexus) {
@@ -1675,14 +1716,37 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
                return -ENOMEM;
        }
        /*
-        *  Initialize the struct se_session pointer
+        *  Initialize the struct se_session pointer and setup tagpool
+        *  for struct tcm_vhost_cmd descriptors
         */
-       tv_nexus->tvn_se_sess = transport_init_session();
+       tv_nexus->tvn_se_sess = transport_init_session_tags(
+                                       TCM_VHOST_DEFAULT_TAGS,
+                                       sizeof(struct tcm_vhost_cmd));
        if (IS_ERR(tv_nexus->tvn_se_sess)) {
                mutex_unlock(&tpg->tv_tpg_mutex);
                kfree(tv_nexus);
                return -ENOMEM;
        }
+       se_sess = tv_nexus->tvn_se_sess;
+       for (i = 0; i < TCM_VHOST_DEFAULT_TAGS; i++) {
+               tv_cmd = &((struct tcm_vhost_cmd *)se_sess->sess_cmd_map)[i];
+
+               tv_cmd->tvc_sgl = kzalloc(sizeof(struct scatterlist) *
+                                       TCM_VHOST_PREALLOC_SGLS, GFP_KERNEL);
+               if (!tv_cmd->tvc_sgl) {
+                       mutex_unlock(&tpg->tv_tpg_mutex);
+                       pr_err("Unable to allocate tv_cmd->tvc_sgl\n");
+                       goto out;
+               }
+
+               tv_cmd->tvc_upages = kzalloc(sizeof(struct page *) *
+                                       TCM_VHOST_PREALLOC_PAGES, GFP_KERNEL);
+               if (!tv_cmd->tvc_upages) {
+                       mutex_unlock(&tpg->tv_tpg_mutex);
+                       pr_err("Unable to allocate tv_cmd->tvc_upages\n");
+                       goto out;
+               }
+       }
        /*
         * Since we are running in 'demo mode' this call with generate a
         * struct se_node_acl for the tcm_vhost struct se_portal_group with
@@ -1694,9 +1758,7 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
                mutex_unlock(&tpg->tv_tpg_mutex);
                pr_debug("core_tpg_check_initiator_node_acl() failed"
                                " for %s\n", name);
-               transport_free_session(tv_nexus->tvn_se_sess);
-               kfree(tv_nexus);
-               return -ENOMEM;
+               goto out;
        }
        /*
         * Now register the TCM vhost virtual I_T Nexus as active with the
@@ -1708,6 +1770,12 @@ static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tpg,
 
        mutex_unlock(&tpg->tv_tpg_mutex);
        return 0;
+
+out:
+       tcm_vhost_free_cmd_map_res(tv_nexus, se_sess);
+       transport_free_session(se_sess);
+       kfree(tv_nexus);
+       return -ENOMEM;
 }
 
 static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg)
@@ -1747,6 +1815,8 @@ static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg)
        pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated"
                " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport),
                tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
+
+       tcm_vhost_free_cmd_map_res(tv_nexus, se_sess);
        /*
         * Release the SCSI I_T Nexus to the emulated vhost Target Port
         */
index 6488a73..7e8346e 100644 (file)
 
 #include "acornfb.h"
 
-/*
- * VIDC machines can't do 16 or 32BPP modes.
- */
-#ifdef HAS_VIDC
-#undef FBCON_HAS_CFB16
-#undef FBCON_HAS_CFB32
-#endif
-
 /*
  * Default resolution.
  * NOTE that it has to be supported in the table towards
@@ -106,238 +98,6 @@ static struct vidc_timing current_vidc;
 
 extern unsigned int vram_size; /* set by setup.c */
 
-#ifdef HAS_VIDC
-
-#define MAX_SIZE       480*1024
-
-/* CTL     VIDC        Actual
- * 24.000  0    8.000
- * 25.175  0    8.392
- * 36.000  0   12.000
- * 24.000  1   12.000
- * 25.175  1   12.588
- * 24.000  2   16.000
- * 25.175  2   16.783
- * 36.000  1   18.000
- * 24.000  3   24.000
- * 36.000  2   24.000
- * 25.175  3   25.175
- * 36.000  3   36.000
- */
-struct pixclock {
-       u_long  min_clock;
-       u_long  max_clock;
-       u_int   vidc_ctl;
-       u_int   vid_ctl;
-};
-
-static struct pixclock arc_clocks[] = {
-       /* we allow +/-1% on these */
-       { 123750, 126250, VIDC_CTRL_DIV3,   VID_CTL_24MHz },    /*  8.000MHz */
-       {  82500,  84167, VIDC_CTRL_DIV2,   VID_CTL_24MHz },    /* 12.000MHz */
-       {  61875,  63125, VIDC_CTRL_DIV1_5, VID_CTL_24MHz },    /* 16.000MHz */
-       {  41250,  42083, VIDC_CTRL_DIV1,   VID_CTL_24MHz },    /* 24.000MHz */
-};
-
-static struct pixclock *
-acornfb_valid_pixrate(struct fb_var_screeninfo *var)
-{
-       u_long pixclock = var->pixclock;
-       u_int i;
-
-       if (!var->pixclock)
-               return NULL;
-
-       for (i = 0; i < ARRAY_SIZE(arc_clocks); i++)
-               if (pixclock > arc_clocks[i].min_clock &&
-                   pixclock < arc_clocks[i].max_clock)
-                       return arc_clocks + i;
-
-       return NULL;
-}
-
-/* VIDC Rules:
- * hcr  : must be even (interlace, hcr/2 must be even)
- * hswr : must be even
- * hdsr : must be odd
- * hder : must be odd
- *
- * vcr  : must be odd
- * vswr : >= 1
- * vdsr : >= 1
- * vder : >= vdsr
- * if interlaced, then hcr/2 must be even
- */
-static void
-acornfb_set_timing(struct fb_var_screeninfo *var)
-{
-       struct pixclock *pclk;
-       struct vidc_timing vidc;
-       u_int horiz_correction;
-       u_int sync_len, display_start, display_end, cycle;
-       u_int is_interlaced;
-       u_int vid_ctl, vidc_ctl;
-       u_int bandwidth;
-
-       memset(&vidc, 0, sizeof(vidc));
-
-       pclk = acornfb_valid_pixrate(var);
-       vidc_ctl = pclk->vidc_ctl;
-       vid_ctl  = pclk->vid_ctl;
-
-       bandwidth = var->pixclock * 8 / var->bits_per_pixel;
-       /* 25.175, 4bpp = 79.444ns per byte, 317.776ns per word: fifo = 2,6 */
-       if (bandwidth > 143500)
-               vidc_ctl |= VIDC_CTRL_FIFO_3_7;
-       else if (bandwidth > 71750)
-               vidc_ctl |= VIDC_CTRL_FIFO_2_6;
-       else if (bandwidth > 35875)
-               vidc_ctl |= VIDC_CTRL_FIFO_1_5;
-       else
-               vidc_ctl |= VIDC_CTRL_FIFO_0_4;
-
-       switch (var->bits_per_pixel) {
-       case 1:
-               horiz_correction = 19;
-               vidc_ctl |= VIDC_CTRL_1BPP;
-               break;
-
-       case 2:
-               horiz_correction = 11;
-               vidc_ctl |= VIDC_CTRL_2BPP;
-               break;
-
-       case 4:
-               horiz_correction = 7;
-               vidc_ctl |= VIDC_CTRL_4BPP;
-               break;
-
-       default:
-       case 8:
-               horiz_correction = 5;
-               vidc_ctl |= VIDC_CTRL_8BPP;
-               break;
-       }
-
-       if (var->sync & FB_SYNC_COMP_HIGH_ACT) /* should be FB_SYNC_COMP */
-               vidc_ctl |= VIDC_CTRL_CSYNC;
-       else {
-               if (!(var->sync & FB_SYNC_HOR_HIGH_ACT))
-                       vid_ctl |= VID_CTL_HS_NHSYNC;
-
-               if (!(var->sync & FB_SYNC_VERT_HIGH_ACT))
-                       vid_ctl |= VID_CTL_VS_NVSYNC;
-       }
-
-       sync_len        = var->hsync_len;
-       display_start   = sync_len + var->left_margin;
-       display_end     = display_start + var->xres;
-       cycle           = display_end + var->right_margin;
-
-       /* if interlaced, then hcr/2 must be even */
-       is_interlaced = (var->vmode & FB_VMODE_MASK) == FB_VMODE_INTERLACED;
-
-       if (is_interlaced) {
-               vidc_ctl |= VIDC_CTRL_INTERLACE;
-               if (cycle & 2) {
-                       cycle += 2;
-                       var->right_margin += 2;
-               }
-       }
-
-       vidc.h_cycle            = (cycle - 2) / 2;
-       vidc.h_sync_width       = (sync_len - 2) / 2;
-       vidc.h_border_start     = (display_start - 1) / 2;
-       vidc.h_display_start    = (display_start - horiz_correction) / 2;
-       vidc.h_display_end      = (display_end - horiz_correction) / 2;
-       vidc.h_border_end       = (display_end - 1) / 2;
-       vidc.h_interlace        = (vidc.h_cycle + 1) / 2;
-
-       sync_len        = var->vsync_len;
-       display_start   = sync_len + var->upper_margin;
-       display_end     = display_start + var->yres;
-       cycle           = display_end + var->lower_margin;
-
-       if (is_interlaced)
-               cycle = (cycle - 3) / 2;
-       else
-               cycle = cycle - 1;
-
-       vidc.v_cycle            = cycle;
-       vidc.v_sync_width       = sync_len - 1;
-       vidc.v_border_start     = display_start - 1;
-       vidc.v_display_start    = vidc.v_border_start;
-       vidc.v_display_end      = display_end - 1;
-       vidc.v_border_end       = vidc.v_display_end;
-
-       if (machine_is_a5k())
-               __raw_writeb(vid_ctl, IOEB_VID_CTL);
-
-       if (memcmp(&current_vidc, &vidc, sizeof(vidc))) {
-               current_vidc = vidc;
-
-               vidc_writel(0xe0000000 | vidc_ctl);
-               vidc_writel(0x80000000 | (vidc.h_cycle << 14));
-               vidc_writel(0x84000000 | (vidc.h_sync_width << 14));
-               vidc_writel(0x88000000 | (vidc.h_border_start << 14));
-               vidc_writel(0x8c000000 | (vidc.h_display_start << 14));
-               vidc_writel(0x90000000 | (vidc.h_display_end << 14));
-               vidc_writel(0x94000000 | (vidc.h_border_end << 14));
-               vidc_writel(0x98000000);
-               vidc_writel(0x9c000000 | (vidc.h_interlace << 14));
-               vidc_writel(0xa0000000 | (vidc.v_cycle << 14));
-               vidc_writel(0xa4000000 | (vidc.v_sync_width << 14));
-               vidc_writel(0xa8000000 | (vidc.v_border_start << 14));
-               vidc_writel(0xac000000 | (vidc.v_display_start << 14));
-               vidc_writel(0xb0000000 | (vidc.v_display_end << 14));
-               vidc_writel(0xb4000000 | (vidc.v_border_end << 14));
-               vidc_writel(0xb8000000);
-               vidc_writel(0xbc000000);
-       }
-#ifdef DEBUG_MODE_SELECTION
-       printk(KERN_DEBUG "VIDC registers for %dx%dx%d:\n", var->xres,
-              var->yres, var->bits_per_pixel);
-       printk(KERN_DEBUG " H-cycle          : %d\n", vidc.h_cycle);
-       printk(KERN_DEBUG " H-sync-width     : %d\n", vidc.h_sync_width);
-       printk(KERN_DEBUG " H-border-start   : %d\n", vidc.h_border_start);
-       printk(KERN_DEBUG " H-display-start  : %d\n", vidc.h_display_start);
-       printk(KERN_DEBUG " H-display-end    : %d\n", vidc.h_display_end);
-       printk(KERN_DEBUG " H-border-end     : %d\n", vidc.h_border_end);
-       printk(KERN_DEBUG " H-interlace      : %d\n", vidc.h_interlace);
-       printk(KERN_DEBUG " V-cycle          : %d\n", vidc.v_cycle);
-       printk(KERN_DEBUG " V-sync-width     : %d\n", vidc.v_sync_width);
-       printk(KERN_DEBUG " V-border-start   : %d\n", vidc.v_border_start);
-       printk(KERN_DEBUG " V-display-start  : %d\n", vidc.v_display_start);
-       printk(KERN_DEBUG " V-display-end    : %d\n", vidc.v_display_end);
-       printk(KERN_DEBUG " V-border-end     : %d\n", vidc.v_border_end);
-       printk(KERN_DEBUG " VIDC Ctrl (E)    : 0x%08X\n", vidc_ctl);
-       printk(KERN_DEBUG " IOEB Ctrl        : 0x%08X\n", vid_ctl);
-#endif
-}
-
-static int
-acornfb_setcolreg(u_int regno, u_int red, u_int green, u_int blue,
-                 u_int trans, struct fb_info *info)
-{
-       union palette pal;
-
-       if (regno >= current_par.palette_size)
-               return 1;
-
-       pal.p = 0;
-       pal.vidc.reg   = regno;
-       pal.vidc.red   = red >> 12;
-       pal.vidc.green = green >> 12;
-       pal.vidc.blue  = blue >> 12;
-
-       current_par.palette[regno] = pal;
-
-       vidc_writel(pal.p);
-
-       return 0;
-}
-#endif
-
 #ifdef HAS_VIDC20
 #include <mach/acornfb.h>
 
@@ -634,16 +394,7 @@ acornfb_adjust_timing(struct fb_info *info, struct fb_var_screeninfo *var, u_int
        /* hsync_len must be even */
        var->hsync_len = (var->hsync_len + 1) & ~1;
 
-#ifdef HAS_VIDC
-       /* left_margin must be odd */
-       if ((var->left_margin & 1) == 0) {
-               var->left_margin -= 1;
-               var->right_margin += 1;
-       }
-
-       /* right_margin must be odd */
-       var->right_margin |= 1;
-#elif defined(HAS_VIDC20)
+#if defined(HAS_VIDC20)
        /* left_margin must be even */
        if (var->left_margin & 1) {
                var->left_margin += 1;
@@ -787,11 +538,7 @@ static int acornfb_set_par(struct fb_info *info)
                break;
        case 8:
                current_par.palette_size = VIDC_PALETTE_SIZE;
-#ifdef HAS_VIDC
-               info->fix.visual = FB_VISUAL_STATIC_PSEUDOCOLOR;
-#else
                info->fix.visual = FB_VISUAL_PSEUDOCOLOR;
-#endif
                break;
 #ifdef HAS_VIDC20
        case 16:
@@ -971,9 +718,6 @@ static void acornfb_init_fbinfo(void)
 #if defined(HAS_VIDC20)
        fb_info.var.red.length     = 8;
        fb_info.var.transp.length  = 4;
-#elif defined(HAS_VIDC)
-       fb_info.var.red.length     = 4;
-       fb_info.var.transp.length  = 1;
 #endif
        fb_info.var.green          = fb_info.var.red;
        fb_info.var.blue           = fb_info.var.red;
@@ -1310,14 +1054,6 @@ static int acornfb_probe(struct platform_device *dev)
                fb_info.fix.smem_start = handle;
        }
 #endif
-#if defined(HAS_VIDC)
-       /*
-        * Archimedes/A5000 machines use a fixed address for their
-        * framebuffers.  Free unused pages
-        */
-       free_unused_pages(PAGE_OFFSET + size, PAGE_OFFSET + MAX_SIZE);
-#endif
-
        fb_info.fix.smem_len = size;
        current_par.palette_size   = VIDC_PALETTE_SIZE;
 
index fb2a7ff..175c8ff 100644 (file)
 #include <asm/hardware/iomd.h>
 #define VIDC_PALETTE_SIZE      256
 #define VIDC_NAME              "VIDC20"
-#elif defined(HAS_VIDC)
-#include <asm/hardware/memc.h>
-#define VIDC_PALETTE_SIZE      16
-#define VIDC_NAME              "VIDC"
 #endif
 
 #define EXTEND8(x) ((x)|(x)<<8)
@@ -101,31 +97,6 @@ struct modex_params {
        const struct modey_params *modey;
 };
 
-#ifdef HAS_VIDC
-
-#define VID_CTL_VS_NVSYNC      (1 << 3)
-#define VID_CTL_HS_NHSYNC      (1 << 2)
-#define VID_CTL_24MHz          (0)
-#define VID_CTL_25MHz          (1)
-#define VID_CTL_36MHz          (2)
-
-#define VIDC_CTRL_CSYNC                (1 << 7)
-#define VIDC_CTRL_INTERLACE    (1 << 6)
-#define VIDC_CTRL_FIFO_0_4     (0 << 4)
-#define VIDC_CTRL_FIFO_1_5     (1 << 4)
-#define VIDC_CTRL_FIFO_2_6     (2 << 4)
-#define VIDC_CTRL_FIFO_3_7     (3 << 4)
-#define VIDC_CTRL_1BPP         (0 << 2)
-#define VIDC_CTRL_2BPP         (1 << 2)
-#define VIDC_CTRL_4BPP         (2 << 2)
-#define VIDC_CTRL_8BPP         (3 << 2)
-#define VIDC_CTRL_DIV3         (0 << 0)
-#define VIDC_CTRL_DIV2         (1 << 0)
-#define VIDC_CTRL_DIV1_5       (2 << 0)
-#define VIDC_CTRL_DIV1         (3 << 0)
-
-#endif
-
 #ifdef HAS_VIDC20
 /*
  * VIDC20 registers
index 285d552..3c14e43 100644 (file)
 P3
+# Standard 224-color Linux logo
 80 80
 255
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 11 15 17 33 49 54 59 85 92 73 97 106 
-83 116 129 105 131 142 115 114 122 74 88 93 20 29 31 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 10 10 10 10 10 10 
-10 10 10 6 6 6 6 6 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 2 3 3 17 23 26 50 67 72 73 97 106 59 85 92 73 97 106 
-105 131 142 124 127 131 105 131 142 105 131 142 53 75 83 6 8 8 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 6 6 6 10 10 10 14 14 14 22 22 22 26 26 26 30 30 30 34 34 34 
-30 30 30 30 30 30 26 26 26 18 18 18 14 14 14 10 10 10 6 6 6 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 0 0 0 
-0 0 0 1 1 1 26 35 39 59 85 92 59 85 92 59 85 92 29 43 47 53 75 83 
-108 122 132 132 98 104 108 122 132 105 131 142 101 101 101 43 45 48 6 8 8 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 14 14 14 26 26 26 42 42 42 54 54 54 66 66 66 78 78 78 78 78 78 
-78 78 78 74 74 74 66 66 66 54 54 54 42 42 42 26 26 26 18 18 18 10 10 10 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 0 
-11 15 17 27 40 45 59 85 92 59 85 92 27 40 45 31 45 49 73 97 106 93 121 133 
-108 122 132 108 122 132 105 131 142 108 122 132 105 131 142 73 97 106 26 35 39 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-22 22 22 42 42 42 66 66 66 86 86 86 66 66 66 38 38 38 38 38 38 22 22 22 
-26 26 26 34 34 34 54 54 54 66 66 66 86 86 86 70 70 70 46 46 46 26 26 26 
-14 14 14 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 7 12 13 21 31 35 42 59 64 
-53 75 83 53 75 83 50 67 72 42 59 64 32 40 45 42 59 64 73 97 106 116 116 116 
-132 98 104 116 116 116 108 122 132 117 104 110 105 131 142 83 116 129 50 67 72 7 12 13 
-1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 26 26 26 
-50 50 50 82 82 82 58 58 58 6 6 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 54 54 54 86 86 86 66 66 66 
-38 38 38 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 1 1 1 6 8 8 15 22 25 26 35 39 36 54 60 53 75 83 59 85 92 
-59 85 92 48 63 69 15 22 25 12 17 20 52 67 79 94 94 94 132 98 104 132 98 104 
-117 104 110 108 122 132 108 122 132 115 114 122 105 131 142 77 105 114 59 85 92 36 54 60 
-7 12 13 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 50 50 50 
-78 78 78 34 34 34 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 70 70 70 
-78 78 78 46 46 46 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 15 22 25 29 43 47 36 54 60 42 59 64 42 59 64 48 63 69 21 31 35 
-6 8 8 29 43 47 36 50 56 43 45 48 79 78 84 132 98 104 165 78 79 132 98 104 
-108 122 132 117 104 110 117 104 110 108 122 132 77 105 114 73 97 106 95 131 149 78 102 129 
-36 50 56 0 0 0 0 0 0 0 0 0 6 6 6 18 18 18 42 42 42 82 82 82 
-26 26 26 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 14 14 14 46 46 46 34 34 34 6 6 6 2 2 6 
-42 42 42 78 78 78 42 42 42 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-27 40 45 53 75 83 48 63 69 24 31 37 6 8 12 0 0 0 18 25 28 26 35 39 
-12 17 20 26 35 39 65 78 84 112 81 86 152 81 83 137 83 86 132 98 104 117 104 110 
-117 104 110 132 98 104 132 98 104 115 114 122 73 97 106 53 75 83 95 131 149 93 124 152 
-68 78 128 15 22 25 0 0 0 0 0 0 10 10 10 30 30 30 66 66 66 58 58 58 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 26 26 26 86 86 86 101 101 101 46 46 46 10 10 10 
-2 2 6 58 58 58 70 70 70 34 34 34 10 10 10 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-36 50 56 21 30 33 4 7 7 0 0 0 1 1 1 17 12 12 69 31 31 68 59 64 
-57 59 63 21 31 35 32 40 45 86 73 69 152 81 83 152 81 83 117 104 110 132 98 104 
-152 81 83 132 98 104 108 122 132 77 105 114 77 105 114 93 121 133 95 131 149 93 124 152 
-95 131 149 53 75 83 11 15 17 0 0 0 14 14 14 42 42 42 86 86 86 10 10 10 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 30 30 30 94 94 94 94 94 94 58 58 58 26 26 26 
-2 2 6 6 6 6 78 78 78 54 54 54 22 22 22 6 6 6 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-17 23 26 2 3 3 0 0 0 17 12 12 69 31 31 123 55 55 123 55 55 152 81 83 
-86 73 69 17 23 26 7 12 13 45 54 57 101 101 101 137 83 86 132 98 104 132 98 104 
-137 83 86 117 104 110 77 105 114 42 59 64 50 67 72 78 102 129 91 117 157 91 117 157 
-95 131 149 83 116 129 40 48 73 6 6 6 22 22 22 62 62 62 62 62 62 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 26 26 26 54 54 54 38 38 38 18 18 18 10 10 10 
-2 2 6 2 2 6 34 34 34 82 82 82 38 38 38 14 14 14 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-1 1 1 1 2 2 2 3 3 28 12 12 123 55 55 174 79 79 174 79 79 174 79 79 
-152 81 83 68 59 64 26 35 39 27 40 45 79 78 84 137 83 86 165 78 79 137 83 86 
-94 94 94 48 63 69 36 50 56 50 67 72 73 97 106 93 121 133 93 124 152 93 124 152 
-95 131 149 91 118 149 78 102 129 27 40 45 30 30 30 78 78 78 30 30 30 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 10 10 10 10 10 10 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 78 78 78 50 50 50 18 18 18 6 6 6 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-4 5 3 24 53 24 19 31 15 8 7 3 90 61 47 165 78 79 174 79 79 174 79 79 
-174 79 79 137 83 86 60 52 57 7 12 13 17 23 26 70 70 70 132 98 104 112 81 86 
-79 78 84 31 45 49 15 22 25 53 75 83 91 118 149 86 106 160 91 117 157 93 124 152 
-91 117 157 93 124 152 95 131 149 53 75 83 50 50 50 86 86 86 14 14 14 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 54 54 54 66 66 66 26 26 26 6 6 6 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-19 31 15 34 76 34 34 76 34 19 31 15 28 12 12 123 55 55 174 79 79 174 79 79 
-174 79 79 165 78 79 112 81 86 32 40 45 15 22 25 38 53 58 65 78 84 29 31 32 
-21 30 33 42 59 64 60 80 103 78 102 129 87 112 149 84 96 162 91 117 157 93 124 152 
-91 117 157 93 124 152 93 121 133 59 85 92 57 68 71 82 85 86 2 2 6 2 2 6 
-2 2 6 6 6 6 10 10 10 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 6 6 6 14 14 14 10 10 10 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 18 18 18 82 82 82 34 34 34 10 10 10 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-34 76 34 40 89 40 40 89 40 34 76 34 8 15 6 48 26 18 123 55 55 174 79 79 
-174 79 79 174 79 79 137 83 86 68 59 64 32 40 45 21 30 33 31 45 49 21 31 35 
-12 17 20 48 63 69 78 102 129 81 88 166 84 96 162 91 117 157 93 124 152 91 117 157 
-93 124 152 95 131 149 83 116 129 59 85 92 57 68 71 86 86 86 2 2 6 2 2 6 
-6 6 6 6 6 6 22 22 22 34 34 34 6 6 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 18 18 18 34 34 34 10 10 10 50 50 50 22 22 22 2 2 6 
-2 2 6 2 2 6 2 2 6 10 10 10 86 86 86 42 42 42 14 14 14 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-40 89 40 40 89 40 40 89 40 40 89 40 24 53 24 6 6 6 69 31 31 123 55 55 
-123 55 55 90 61 47 69 31 31 36 32 33 21 31 35 7 12 13 18 25 28 48 63 69 
-60 80 103 68 78 128 84 101 153 84 96 162 84 96 162 91 117 157 91 117 157 84 96 162 
-91 117 157 73 97 106 48 63 69 50 67 72 57 59 63 86 86 86 2 2 6 2 2 6 
-38 38 38 116 116 116 94 94 94 22 22 22 22 22 22 2 2 6 2 2 6 2 2 6 
-14 14 14 86 86 86 124 131 137 170 170 170 151 151 151 38 38 38 26 26 26 6 6 6 
-2 2 6 2 2 6 2 2 6 2 2 6 86 86 86 46 46 46 14 14 14 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-34 76 34 40 89 40 40 89 40 40 89 40 34 76 34 19 31 15 17 12 12 48 26 18 
-48 26 18 8 7 3 10 10 22 23 29 47 51 61 92 42 59 64 21 30 33 34 45 54 
-68 78 128 81 88 166 81 82 173 86 106 160 86 106 160 84 96 162 86 106 160 87 112 149 
-91 118 149 77 105 114 52 67 79 32 40 45 50 50 50 86 86 86 2 2 6 14 14 14 
-124 131 137 198 198 198 195 195 195 116 116 116 10 10 10 2 2 6 2 2 6 6 6 6 
-101 98 89 187 187 187 210 210 210 218 218 218 214 214 214 124 131 137 14 14 14 6 6 6 
-2 2 6 2 2 6 2 2 6 2 2 6 86 86 86 50 50 50 18 18 18 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-19 31 15 34 76 34 40 89 40 40 89 40 40 89 40 24 53 24 8 7 3 0 0 0 
-6 8 12 28 32 52 51 61 92 54 54 122 74 77 160 68 78 128 26 35 39 6 8 8 
-34 45 54 68 78 128 84 96 162 86 106 160 86 106 160 81 88 166 84 96 162 87 112 149 
-73 97 106 36 50 56 33 49 54 18 18 18 46 46 46 86 86 86 2 2 6 54 54 54 
-218 218 218 195 195 195 226 226 226 246 246 246 58 58 58 2 2 6 2 2 6 30 30 30 
-210 210 210 253 253 253 170 170 170 124 127 131 221 221 221 234 234 234 74 74 74 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 70 70 70 58 58 58 22 22 22 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-4 5 3 24 53 24 40 89 40 40 89 40 34 76 34 12 22 15 4 5 3 4 5 3 
-13 17 26 54 54 122 78 78 174 78 78 174 78 78 174 74 77 160 51 61 92 21 31 35 
-26 35 39 53 75 83 84 101 153 81 82 173 81 88 166 84 101 153 60 80 103 60 80 103 
-53 75 83 38 53 58 42 59 64 22 22 22 46 46 46 82 82 82 2 2 6 106 106 106 
-170 170 170 26 26 26 86 86 86 226 226 226 124 127 131 10 10 10 14 14 14 46 46 46 
-231 231 231 190 190 190 6 6 6 70 70 70 90 90 90 238 238 238 151 151 151 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 70 70 70 58 58 58 22 22 22 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-1 2 2 8 15 6 24 53 24 34 76 34 19 31 15 8 15 6 63 55 20 63 55 20 
-18 18 18 40 48 73 74 77 160 78 78 174 78 78 174 81 82 173 74 77 160 52 67 79 
-17 23 26 21 31 35 60 80 103 81 88 166 74 77 160 78 102 129 36 54 60 12 17 20 
-42 59 64 48 63 69 21 31 35 18 18 18 42 42 42 86 86 86 6 6 6 116 116 116 
-106 106 106 6 6 6 70 70 70 151 151 151 124 127 131 18 18 18 38 38 38 54 54 54 
-221 221 221 106 106 106 2 2 6 14 14 14 46 46 46 190 190 190 198 198 198 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 74 74 74 62 62 62 22 22 22 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-11 15 17 0 0 0 12 22 15 19 31 15 8 15 6 63 55 20 149 139 69 149 139 69 
-63 55 20 10 10 22 54 54 122 78 78 174 78 78 174 78 78 174 81 82 173 68 78 128 
-24 31 37 6 6 6 36 50 56 60 80 103 51 61 92 42 59 64 36 50 56 31 45 49 
-29 43 47 27 40 45 6 8 8 14 14 14 42 42 42 94 94 94 14 14 14 101 101 101 
-124 127 131 2 2 6 18 18 18 116 116 116 106 107 48 121 92 8 121 92 8 98 70 6 
-170 170 170 106 106 106 2 2 6 2 2 6 2 2 6 195 195 195 195 195 195 6 6 6 
-2 2 6 2 2 6 2 2 6 2 2 6 74 74 74 62 62 62 22 22 22 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-26 35 39 3 5 6 1 1 1 2 3 3 35 31 12 133 118 54 175 176 80 175 176 80 
-133 118 54 35 31 12 23 29 47 54 54 122 78 78 174 78 78 174 74 77 160 68 78 128 
-51 61 92 31 45 49 26 35 39 36 50 56 29 43 47 7 12 13 21 30 33 42 59 64 
-18 25 28 7 12 13 1 1 1 10 10 10 38 38 38 90 90 90 14 14 14 58 58 58 
-210 210 210 26 26 26 62 42 6 154 114 10 226 170 11 237 188 10 220 174 15 184 138 11 
-220 174 15 174 140 55 35 31 12 2 2 6 70 70 70 246 246 246 124 131 137 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 70 70 70 66 66 66 26 26 26 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-27 40 45 17 23 26 2 3 3 1 1 1 56 77 35 165 152 80 175 176 80 175 176 80 
-175 176 80 106 107 48 22 22 22 28 32 52 54 54 122 54 54 122 51 61 92 28 32 52 
-20 27 34 31 45 49 11 15 17 7 12 13 36 50 56 31 45 49 29 43 47 36 50 56 
-6 8 8 0 0 0 0 0 0 10 10 10 38 38 38 86 86 86 14 14 14 10 10 10 
-195 195 195 198 179 130 192 133 9 220 174 15 239 182 13 237 188 10 232 195 16 239 207 25 
-237 201 50 241 208 19 232 195 16 184 138 11 198 179 130 208 206 196 42 42 42 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 50 50 50 74 74 74 30 30 30 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-15 22 25 26 35 39 15 22 25 0 0 0 35 31 12 133 118 54 175 176 80 175 176 80 
-175 176 80 165 152 80 56 77 35 6 8 12 23 29 47 13 17 26 2 2 6 0 0 0 
-1 2 2 26 35 39 26 35 39 26 35 39 42 59 64 42 59 64 20 29 31 6 8 8 
-0 0 0 0 0 0 0 0 0 10 10 10 34 34 34 86 86 86 14 14 14 2 2 6 
-121 92 8 192 133 9 219 162 10 239 182 13 237 188 10 232 195 16 241 208 19 237 201 50 
-237 201 50 239 207 25 241 208 19 241 208 19 241 208 19 230 187 11 121 92 8 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 50 50 50 82 82 82 34 34 34 10 10 10 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-1 2 2 15 22 25 31 45 49 6 8 12 4 5 3 63 55 20 149 139 69 175 176 80 
-175 176 80 175 176 80 106 107 48 20 16 6 1 1 1 0 0 0 2 3 3 11 15 17 
-21 30 33 36 50 56 36 50 56 24 31 37 15 22 25 6 8 8 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 10 10 10 34 34 34 82 82 82 30 30 30 62 42 6 
-180 123 7 206 145 10 230 174 11 239 182 13 237 188 10 238 202 15 241 208 19 237 201 50 
-239 207 25 241 208 19 241 208 19 241 208 19 230 187 11 220 174 15 184 138 11 6 6 6 
-2 2 6 2 2 6 2 2 6 2 2 6 26 26 26 94 94 94 42 42 42 14 14 14 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 1 2 2 29 43 47 26 35 39 3 5 6 8 7 3 106 107 48 165 152 80 
-175 176 80 149 139 69 63 55 20 4 5 3 2 3 3 12 17 20 26 35 39 26 35 39 
-17 23 26 7 12 13 6 8 8 3 5 6 1 2 2 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 10 10 10 30 30 30 78 78 78 50 50 50 104 69 6 
-192 133 9 216 158 10 236 178 12 237 188 10 232 195 16 241 208 19 237 201 50 237 201 50 
-241 208 19 241 208 19 241 208 19 204 160 10 200 144 11 216 158 10 156 118 10 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 90 90 90 54 54 54 18 18 18 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 12 17 20 27 40 45 18 25 28 1 1 1 35 31 12 106 107 48 
-149 139 69 56 77 35 8 7 3 1 2 2 12 17 20 26 35 39 21 31 35 11 15 17 
-3 5 6 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 10 10 10 30 30 30 78 78 78 46 46 46 22 22 22 
-137 92 6 204 160 10 239 182 13 237 188 10 238 202 15 241 208 19 241 208 19 241 208 19 
-241 208 19 204 160 10 184 138 11 210 150 10 216 158 10 210 150 10 98 70 6 2 2 6 
-6 6 6 54 54 54 14 14 14 2 2 6 2 2 6 62 62 62 74 74 74 30 30 30 
-10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 1 1 1 15 22 25 33 49 54 12 17 20 2 3 3 35 31 12 
-56 77 35 20 16 6 1 1 1 18 25 28 21 31 35 11 15 17 1 1 1 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 10 10 10 34 34 34 78 78 78 50 50 50 6 6 6 
-88 55 22 139 102 15 190 146 13 230 187 11 239 207 25 232 195 16 220 174 15 190 146 13 
-171 120 8 192 133 9 210 150 10 213 154 11 185 146 40 165 152 80 101 98 89 2 2 6 
-2 2 6 78 78 78 116 116 116 58 58 58 2 2 6 22 22 22 90 90 90 46 46 46 
-18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 1 1 1 27 40 45 29 43 47 3 5 6 2 3 3 
-8 7 3 1 1 1 17 23 26 31 45 49 15 22 25 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 10 10 10 38 38 38 86 86 86 50 50 50 6 6 6 
-124 127 131 168 158 138 156 107 11 171 120 8 204 160 10 184 138 11 197 138 11 200 144 11 
-206 145 10 206 145 10 197 138 11 198 179 130 195 195 195 198 198 198 170 170 170 14 14 14 
-2 2 6 22 22 22 116 116 116 116 116 116 22 22 22 2 2 6 74 74 74 70 70 70 
-30 30 30 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 11 15 17 31 45 49 26 35 39 3 5 6 
-0 0 0 7 12 13 27 40 45 18 25 28 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 6 6 6 18 18 18 50 50 50 101 101 101 26 26 26 10 10 10 
-124 131 137 190 190 190 168 158 138 156 107 11 197 138 11 200 144 11 197 138 11 192 133 9 
-180 123 7 185 146 40 198 179 130 187 187 187 202 202 202 221 221 221 214 214 214 66 66 66 
-2 2 6 2 2 6 50 50 50 62 62 62 6 6 6 2 2 6 10 10 10 90 90 90 
-50 50 50 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 15 22 25 36 54 60 18 25 28 
-0 0 0 21 30 33 27 40 45 2 3 3 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 10 10 10 34 34 34 74 74 74 74 74 74 2 2 6 6 6 6 
-151 151 151 198 198 198 190 190 190 168 158 138 148 132 55 156 107 11 156 107 11 169 125 40 
-168 158 138 187 187 187 190 190 190 210 210 210 246 246 246 253 253 253 253 253 253 180 180 180 
-6 6 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 62 62 62 
-74 74 74 34 34 34 14 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 27 40 45 35 52 58 
-18 25 28 35 52 58 17 23 26 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 10 10 10 22 22 22 54 54 54 94 94 94 18 18 18 2 2 6 46 46 46 
-234 234 234 221 221 221 190 190 190 190 190 190 190 190 190 187 187 187 187 187 187 190 190 190 
-190 190 190 195 195 195 214 214 214 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-82 82 82 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 14 14 14 
-86 86 86 54 54 54 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 7 12 13 33 49 54 
-52 72 81 36 54 60 6 8 8 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 18 18 18 46 46 46 90 90 90 46 46 46 18 18 18 6 6 6 180 180 180 
-253 253 253 246 246 246 202 202 202 190 190 190 190 190 190 190 190 190 190 190 190 190 190 190 
-202 202 202 231 231 231 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-202 202 202 14 14 14 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-42 42 42 86 86 86 42 42 42 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 17 20 
-36 54 60 29 43 47 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-14 14 14 38 38 38 74 74 74 66 66 66 2 2 6 6 6 6 90 90 90 250 250 250 
-253 253 253 253 253 253 238 238 238 198 198 198 190 190 190 190 190 190 195 195 195 221 221 221 
-246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 82 82 82 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 78 78 78 70 70 70 34 34 34 14 14 14 6 6 6 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-21 30 33 35 52 58 6 8 12 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 14 14 
-34 34 34 66 66 66 78 78 78 6 6 6 2 2 6 18 18 18 218 218 218 253 253 253 
-253 253 253 253 253 253 253 253 253 246 246 246 226 226 226 231 231 231 246 246 246 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 180 180 180 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 18 18 18 90 90 90 62 62 62 30 30 30 10 10 10 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-12 17 20 36 54 60 29 43 47 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 26 26 26 
-58 58 58 90 90 90 18 18 18 2 2 6 2 2 6 106 106 106 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 250 250 250 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 231 231 231 18 18 18 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 18 18 18 94 94 94 54 54 54 26 26 26 10 10 10 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 21 30 33 35 52 58 6 8 12 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 50 50 50 
-90 90 90 26 26 26 2 2 6 2 2 6 14 14 14 195 195 195 250 250 250 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-250 250 250 242 242 242 54 54 54 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 38 38 38 86 86 86 50 50 50 22 22 22 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 12 17 20 36 54 60 29 43 47 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 14 14 14 38 38 38 82 82 82 
-34 34 34 2 2 6 2 2 6 2 2 6 42 42 42 195 195 195 246 246 246 253 253 253 
-253 253 253 253 253 253 253 253 253 250 250 250 242 242 242 242 242 242 250 250 250 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 250 250 250 246 246 246 238 238 238 
-226 226 226 231 231 231 101 101 101 6 6 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 38 38 38 82 82 82 42 42 42 14 14 14 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 21 30 33 35 52 58 6 8 12 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 26 26 26 62 62 62 66 66 66 
-2 2 6 2 2 6 2 2 6 6 6 6 70 70 70 170 170 170 202 202 202 234 234 234 
-246 246 246 250 250 250 250 250 250 238 238 238 226 226 226 231 231 231 238 238 238 250 250 250 
-250 250 250 250 250 250 246 246 246 231 231 231 214 214 214 202 202 202 202 202 202 202 202 202 
-198 198 198 202 202 202 180 180 180 18 18 18 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 62 62 62 66 66 66 30 30 30 
-10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 12 17 20 36 54 60 29 43 47 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 14 14 14 42 42 42 82 82 82 18 18 18 
-2 2 6 2 2 6 2 2 6 10 10 10 94 94 94 180 180 180 218 218 218 242 242 242 
-250 250 250 253 253 253 253 253 253 250 250 250 234 234 234 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 246 246 246 238 238 238 226 226 226 210 210 210 202 202 202 
-195 195 195 195 195 195 210 210 210 151 151 151 6 6 6 14 14 14 50 50 50 14 14 14 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 86 86 86 46 46 46 
-18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 21 30 33 35 52 58 6 8 12 0 0 0 
-0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 54 54 54 70 70 70 2 2 6 
-2 2 6 10 10 10 2 2 6 22 22 22 170 170 170 231 231 231 250 250 250 253 253 253 
-253 253 253 253 253 253 253 253 253 250 250 250 242 242 242 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 246 246 246 
-231 231 231 202 202 202 198 198 198 226 226 226 94 94 94 2 2 6 6 6 6 38 38 38 
-30 30 30 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 62 62 62 66 66 66 
-26 26 26 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 6 8 8 33 49 54 29 43 47 6 8 12 
-0 0 0 0 0 0 0 0 0 10 10 10 30 30 30 74 74 74 50 50 50 2 2 6 
-26 26 26 26 26 26 2 2 6 106 106 106 238 238 238 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 246 246 246 218 218 218 202 202 202 210 210 210 14 14 14 2 2 6 2 2 6 
-30 30 30 22 22 22 2 2 6 2 2 6 2 2 6 2 2 6 18 18 18 86 86 86 
-42 42 42 14 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 12 17 20 33 49 54 17 23 26 
-0 0 0 0 0 0 0 0 0 14 14 14 42 42 42 90 90 90 22 22 22 2 2 6 
-42 42 42 2 2 6 18 18 18 218 218 218 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 250 250 250 221 221 221 218 218 218 101 101 101 2 2 6 14 14 14 
-18 18 18 38 38 38 10 10 10 2 2 6 2 2 6 2 2 6 2 2 6 78 78 78 
-58 58 58 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 15 22 25 36 54 60 
-0 0 0 0 0 0 0 0 0 18 18 18 54 54 54 82 82 82 2 2 6 26 26 26 
-22 22 22 2 2 6 124 127 131 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 250 250 250 238 238 238 198 198 198 6 6 6 38 38 38 
-58 58 58 26 26 26 38 38 38 2 2 6 2 2 6 2 2 6 2 2 6 46 46 46 
-78 78 78 30 30 30 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 21 30 33 
-36 54 60 0 0 0 0 0 0 30 30 30 74 74 74 58 58 58 2 2 6 42 42 42 
-2 2 6 22 22 22 231 231 231 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 246 246 246 46 46 46 38 38 38 
-42 42 42 14 14 14 38 38 38 14 14 14 2 2 6 2 2 6 2 2 6 6 6 6 
-86 86 86 46 46 46 14 14 14 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-36 54 60 0 0 0 0 0 0 42 42 42 90 90 90 18 18 18 18 18 18 26 26 26 
-2 2 6 116 116 116 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 250 250 250 238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 94 94 94 6 6 6 
-2 2 6 2 2 6 10 10 10 34 34 34 2 2 6 2 2 6 2 2 6 2 2 6 
-74 74 74 58 58 58 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 36 54 60 26 26 26 66 66 66 82 82 82 2 2 6 38 38 38 6 6 6 
-14 14 14 210 210 210 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 246 246 246 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 151 151 151 2 2 6 
-2 2 6 2 2 6 2 2 6 46 46 46 2 2 6 2 2 6 2 2 6 2 2 6 
-42 42 42 74 74 74 30 30 30 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 36 54 60 21 30 33 90 90 90 26 26 26 6 6 6 42 42 42 2 2 6 
-74 74 74 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 242 242 242 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 180 180 180 2 2 6 
-2 2 6 2 2 6 2 2 6 46 46 46 2 2 6 2 2 6 2 2 6 2 2 6 
-10 10 10 86 86 86 38 38 38 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-10 10 10 26 26 26 36 54 60 82 82 82 2 2 6 22 22 22 18 18 18 2 2 6 
-151 151 151 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 234 234 234 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 202 202 202 2 2 6 
-2 2 6 2 2 6 2 2 6 38 38 38 2 2 6 2 2 6 2 2 6 2 2 6 
-6 6 6 86 86 86 46 46 46 14 14 14 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-18 18 18 46 46 46 86 86 86 36 54 60 2 2 6 34 34 34 10 10 10 6 6 6 
-210 210 210 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 234 234 234 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 221 221 221 6 6 6 
-2 2 6 2 2 6 6 6 6 30 30 30 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 82 82 82 54 54 54 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-26 26 26 66 66 66 62 62 62 2 2 6 2 2 6 38 38 38 10 10 10 26 26 26 
-238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 231 231 231 6 6 6 
-2 2 6 2 2 6 10 10 10 30 30 30 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 66 66 66 58 58 58 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-38 38 38 78 78 78 6 6 6 2 2 6 2 2 6 46 46 46 14 14 14 42 42 42 
-246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 234 234 234 10 10 10 
-2 2 6 2 2 6 22 22 22 14 14 14 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 66 66 66 62 62 62 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 18 18 18 
-50 50 50 74 74 74 2 2 6 2 2 6 14 14 14 70 70 70 34 34 34 62 62 62 
-250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 234 234 234 14 14 14 
-2 2 6 2 2 6 30 30 30 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 66 66 66 62 62 62 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 18 18 18 
-54 54 54 62 62 62 2 2 6 2 2 6 2 2 6 30 30 30 46 46 46 70 70 70 
-250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 226 226 226 10 10 10 
-2 2 6 6 6 6 30 30 30 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 66 66 66 58 58 58 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 
-58 58 58 62 62 62 2 2 6 2 2 6 2 2 6 2 2 6 30 30 30 78 78 78 
-250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 202 202 202 2 2 6 
-22 22 22 34 34 34 20 16 6 22 22 22 26 26 26 18 18 18 6 6 6 2 2 6 
-2 2 6 82 82 82 54 54 54 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 26 26 26 
-62 62 62 106 106 106 63 55 20 184 138 11 204 160 10 121 92 8 6 6 6 62 62 62 
-238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 151 151 151 18 18 18 
-14 14 14 2 2 6 2 2 6 2 2 6 6 6 6 18 18 18 66 66 66 38 38 38 
-6 6 6 94 94 94 50 50 50 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 6 6 6 10 10 10 10 10 10 18 18 18 38 38 38 
-78 78 78 138 132 106 216 158 10 242 186 14 246 190 14 246 190 14 156 118 10 10 10 10 
-90 90 90 238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 246 230 190 214 187 87 214 187 87 185 146 40 35 31 12 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 38 38 38 46 46 46 
-26 26 26 106 106 106 54 54 54 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 6 6 6 14 14 14 22 22 22 30 30 30 38 38 38 50 50 50 70 70 70 
-106 106 106 185 146 40 226 170 11 242 186 14 246 190 14 246 190 14 246 190 14 154 114 10 
-6 6 6 74 74 74 226 226 226 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 231 231 231 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 237 201 50 241 196 14 241 208 19 232 195 16 35 31 12 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 30 30 30 26 26 26 
-204 160 10 165 152 80 66 66 66 26 26 26 6 6 6 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 18 18 18 38 38 38 58 58 58 78 78 78 86 86 86 101 101 101 124 127 131 
-174 140 55 210 150 10 234 174 13 246 186 14 246 190 14 246 190 14 246 190 14 237 188 10 
-98 70 6 2 2 6 46 46 46 198 198 198 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 234 234 234 242 242 242 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 214 187 87 242 186 14 241 196 14 204 160 10 20 16 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 6 6 6 121 92 8 
-238 202 15 232 195 16 82 82 82 34 34 34 10 10 10 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-14 14 14 38 38 38 70 70 70 148 132 55 185 146 40 200 144 11 197 138 11 197 138 11 
-213 154 11 226 170 11 242 186 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-220 174 15 35 31 12 2 2 6 22 22 22 151 151 151 250 250 250 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 250 250 250 242 242 242 214 187 87 239 182 13 237 188 10 213 154 11 35 31 12 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 62 42 6 220 174 15 
-237 188 10 237 188 10 113 101 86 42 42 42 14 14 14 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-22 22 22 54 54 54 148 132 55 213 154 11 226 170 11 230 174 11 226 170 11 226 170 11 
-236 178 12 242 186 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-241 196 14 184 138 11 10 10 10 2 2 6 6 6 6 116 116 116 242 242 242 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 231 231 231 198 198 198 213 164 39 236 178 12 236 178 12 210 150 10 137 92 6 
-20 16 6 2 2 6 2 2 6 2 2 6 6 6 6 62 42 6 200 144 11 236 178 12 
-239 182 13 239 182 13 124 112 88 58 58 58 22 22 22 6 6 6 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-30 30 30 70 70 70 169 125 40 226 170 11 239 182 13 242 186 14 242 186 14 246 186 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 232 195 16 98 70 6 2 2 6 2 2 6 2 2 6 66 66 66 221 221 221 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 202 202 202 198 198 198 213 164 39 230 174 11 230 174 11 216 158 10 192 133 9 
-163 110 8 120 80 7 98 70 6 120 80 7 167 114 7 197 138 11 226 170 11 239 182 13 
-242 186 14 242 186 14 165 152 80 78 78 78 34 34 34 14 14 14 6 6 6 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-30 30 30 78 78 78 185 146 40 226 170 11 239 182 13 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 241 196 14 204 160 10 20 16 6 2 2 6 2 2 6 2 2 6 38 38 38 
-218 218 218 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-250 250 250 202 202 202 198 198 198 213 164 39 226 170 11 236 178 12 224 166 10 210 150 10 
-200 144 11 197 138 11 192 133 9 197 138 11 210 150 10 226 170 11 242 186 14 246 190 14 
-246 190 14 246 186 14 220 174 15 124 112 88 62 62 62 30 30 30 14 14 14 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-30 30 30 78 78 78 174 140 55 224 166 10 239 182 13 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 241 196 14 139 102 15 2 2 6 2 2 6 2 2 6 2 2 6 
-78 78 78 250 250 250 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-250 250 250 214 214 214 198 198 198 185 146 40 219 162 10 236 178 12 234 174 13 224 166 10 
-216 158 10 213 154 11 213 154 11 216 158 10 226 170 11 239 182 13 246 190 14 246 190 14 
-246 190 14 246 190 14 242 186 14 213 164 39 101 101 101 58 58 58 30 30 30 14 14 14 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-30 30 30 74 74 74 174 140 55 216 158 10 236 178 12 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 241 196 14 230 187 11 62 42 6 2 2 6 2 2 6 2 2 6 
-22 22 22 238 238 238 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 226 226 226 187 187 187 169 125 40 216 158 10 236 178 12 239 182 13 236 178 12 
-230 174 11 226 170 11 226 170 11 230 174 11 236 178 12 242 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 186 14 239 182 13 213 164 39 106 106 106 66 66 66 34 34 34 
-14 14 14 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-26 26 26 70 70 70 149 139 69 213 154 11 236 178 12 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 241 196 14 190 146 13 20 16 6 2 2 6 2 2 6 
-46 46 46 246 246 246 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 221 221 221 86 86 86 156 107 11 216 158 10 236 178 12 242 186 14 246 186 14 
-242 186 14 239 182 13 239 182 13 242 186 14 242 186 14 246 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 242 186 14 220 174 15 149 139 69 66 66 66 
-30 30 30 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-26 26 26 70 70 70 149 139 69 210 150 10 236 178 12 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 232 195 16 121 92 8 34 34 34 106 106 106 
-221 221 221 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-242 242 242 82 82 82 20 16 6 163 110 8 216 158 10 236 178 12 242 186 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 242 186 14 149 139 69 
-46 46 46 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 10 10 10 
-30 30 30 78 78 78 149 139 69 210 150 10 236 178 12 246 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 241 196 14 220 174 15 198 179 130 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 218 218 218 
-58 58 58 2 2 6 20 16 6 167 114 7 216 158 10 236 178 12 246 186 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 186 14 242 186 14 185 146 40 
-54 54 54 22 22 22 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 14 14 
-38 38 38 86 86 86 169 125 40 213 154 11 236 178 12 246 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 232 195 16 190 146 13 214 214 214 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 250 250 250 170 170 170 26 26 26 
-2 2 6 2 2 6 35 31 12 163 110 8 219 162 10 239 182 13 246 186 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 186 14 236 178 12 224 166 10 149 139 69 
-46 46 46 18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 18 18 18 
-50 50 50 113 101 86 192 133 9 224 166 10 242 186 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 242 186 14 230 187 11 204 160 10 133 118 54 
-226 226 226 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 253 
-253 253 253 253 253 253 253 253 253 253 253 253 198 198 198 66 66 66 2 2 6 2 2 6 
-2 2 6 2 2 6 62 42 6 156 107 11 219 162 10 239 182 13 246 186 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 242 186 14 234 174 13 213 154 11 148 132 55 66 66 66 
-30 30 30 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 
-58 58 58 148 132 55 206 145 10 234 174 13 242 186 14 246 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 186 14 236 178 12 204 160 10 163 110 8 
-62 42 6 124 131 137 218 218 218 250 250 250 253 253 253 253 253 253 253 253 253 250 250 250 
-242 242 242 210 210 210 151 151 151 66 66 66 6 6 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 62 42 6 163 110 8 216 158 10 236 178 12 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 239 182 13 230 174 11 216 158 10 185 146 40 124 112 88 70 70 70 38 38 38 
-18 18 18 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 22 22 22 
-62 62 62 169 125 40 206 145 10 224 166 10 236 178 12 239 182 13 242 186 14 242 186 14 
-246 186 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 236 178 12 216 158 10 171 120 8 
-85 57 6 2 2 6 6 6 6 30 30 30 54 54 54 62 62 62 50 50 50 38 38 38 
-14 14 14 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 6 6 6 85 57 6 167 114 7 213 154 11 236 178 12 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 190 14 242 186 14 239 182 13 239 182 13 
-230 174 11 210 150 10 174 140 55 124 112 88 82 82 82 54 54 54 34 34 34 18 18 18 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 18 18 18 
-50 50 50 169 125 40 192 133 9 200 144 11 216 158 10 219 162 10 224 166 10 226 170 11 
-230 174 11 236 178 12 239 182 13 239 182 13 242 186 14 246 186 14 246 190 14 246 190 14 
-246 190 14 246 190 14 246 190 14 246 190 14 246 186 14 230 174 11 210 150 10 163 110 8 
-104 69 6 10 10 10 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 6 6 6 85 57 6 167 114 7 206 145 10 230 174 11 242 186 14 246 190 14 
-246 190 14 246 190 14 246 186 14 242 186 14 239 182 13 230 174 11 224 166 10 213 154 11 
-169 125 40 124 112 88 86 86 86 58 58 58 38 38 38 22 22 22 10 10 10 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 14 14 14 
-34 34 34 70 70 70 133 118 54 169 125 40 167 114 7 180 123 7 192 133 9 197 138 11 
-200 144 11 206 145 10 213 154 11 219 162 10 224 166 10 230 174 11 239 182 13 242 186 14 
-246 186 14 246 186 14 246 186 14 246 186 14 239 182 13 216 158 10 184 138 11 152 99 6 
-104 69 6 20 16 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 2 2 6 
-2 2 6 6 6 6 85 57 6 152 99 6 192 133 9 219 162 10 236 178 12 239 182 13 
-246 186 14 242 186 14 239 182 13 236 178 12 224 166 10 206 145 10 192 133 9 148 132 55 
-94 94 94 62 62 62 42 42 42 22 22 22 14 14 14 6 6 6 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-18 18 18 34 34 34 58 58 58 78 78 78 101 98 89 124 112 88 133 118 54 156 107 11 
-163 110 8 167 114 7 171 120 8 180 123 7 184 138 11 197 138 11 210 150 10 219 162 10 
-226 170 11 236 178 12 236 178 12 234 174 13 219 162 10 197 138 11 163 110 8 134 84 6 
-85 57 6 10 10 10 2 2 6 2 2 6 18 18 18 38 38 38 38 38 38 38 38 38 
-38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 38 26 26 26 2 2 6 
-2 2 6 6 6 6 62 42 6 137 92 6 171 120 8 200 144 11 219 162 10 230 174 11 
-234 174 13 230 174 11 219 162 10 210 150 10 192 133 9 163 110 8 124 112 88 82 82 82 
-50 50 50 30 30 30 14 14 14 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 14 14 14 22 22 22 34 34 34 42 42 42 58 58 58 74 74 74 86 86 86 
-101 98 89 113 101 86 133 118 54 121 92 8 137 92 6 152 99 6 163 110 8 180 123 7 
-184 138 11 197 138 11 206 145 10 200 144 11 180 123 7 156 107 11 134 84 6 104 69 6 
-62 42 6 54 54 54 106 106 106 101 98 89 86 86 86 82 82 82 78 78 78 78 78 78 
-78 78 78 78 78 78 78 78 78 78 78 78 78 78 78 82 82 82 86 86 86 94 94 94 
-106 106 106 101 101 101 90 61 47 120 80 7 156 107 11 180 123 7 192 133 9 200 144 11 
-206 145 10 200 144 11 192 133 9 171 120 8 139 102 15 113 101 86 70 70 70 42 42 42 
-22 22 22 10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 6 6 6 10 10 10 14 14 14 22 22 22 30 30 30 38 38 38 
-50 50 50 62 62 62 74 74 74 90 90 90 101 98 89 113 101 86 121 92 8 120 80 7 
-137 92 6 152 99 6 152 99 6 152 99 6 134 84 6 120 80 7 98 70 6 88 55 22 
-101 98 89 82 82 82 58 58 58 46 46 46 38 38 38 34 34 34 34 34 34 34 34 34 
-34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 34 38 38 38 42 42 42 
-54 54 54 82 82 82 94 86 71 85 57 6 134 84 6 156 107 11 167 114 7 171 120 8 
-171 120 8 167 114 7 152 99 6 121 92 8 101 98 89 62 62 62 34 34 34 18 18 18 
-6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 10 10 10 
-18 18 18 22 22 22 30 30 30 42 42 42 50 50 50 66 66 66 86 86 86 101 98 89 
-94 86 71 98 70 6 104 69 6 104 69 6 104 69 6 85 57 6 88 55 22 90 90 90 
-62 62 62 38 38 38 22 22 22 14 14 14 10 10 10 10 10 10 10 10 10 10 10 10 
-10 10 10 10 10 10 6 6 6 10 10 10 10 10 10 10 10 10 10 10 10 14 14 14 
-22 22 22 42 42 42 70 70 70 94 86 71 85 57 6 104 69 6 120 80 7 137 92 6 
-134 84 6 120 80 7 94 86 71 86 86 86 58 58 58 30 30 30 14 14 14 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 6 6 6 10 10 10 14 14 14 18 18 18 26 26 26 38 38 38 54 54 54 
-70 70 70 86 86 86 94 86 71 94 86 71 94 86 71 86 86 86 74 74 74 50 50 50 
-30 30 30 14 14 14 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-6 6 6 18 18 18 34 34 34 58 58 58 82 82 82 94 86 71 94 86 71 94 86 71 
-94 86 71 94 86 71 74 74 74 50 50 50 26 26 26 14 14 14 6 6 6 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6 6 6 14 14 14 18 18 18 
-30 30 30 38 38 38 46 46 46 54 54 54 50 50 50 42 42 42 30 30 30 18 18 18 
-10 10 10 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 6 6 6 14 14 14 26 26 26 38 38 38 50 50 50 58 58 58 58 58 58 
-54 54 54 42 42 42 30 30 30 18 18 18 10 10 10 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 
-6 6 6 10 10 10 14 14 14 18 18 18 18 18 18 14 14 14 10 10 10 6 6 6 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 6 6 6 14 14 14 18 18 18 22 22 22 22 22 22 
-18 18 18 14 14 14 10 10 10 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
-
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6   6   6   6  10  10  10  10  10  10
+ 10  10  10   6   6   6   6   6   6   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  10  10  10  14  14  14
+ 22  22  22  26  26  26  30  30  30  34  34  34
+ 30  30  30  30  30  30  26  26  26  18  18  18
+ 14  14  14  10  10  10   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  14  14  14  26  26  26  42  42  42
+ 54  54  54  66  66  66  78  78  78  78  78  78
+ 78  78  78  74  74  74  66  66  66  54  54  54
+ 42  42  42  26  26  26  18  18  18  10  10  10
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 22  22  22  42  42  42  66  66  66  86  86  86
+ 66  66  66  38  38  38  38  38  38  22  22  22
+ 26  26  26  34  34  34  54  54  54  66  66  66
+ 86  86  86  70  70  70  46  46  46  26  26  26
+ 14  14  14   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0  10  10  10  26  26  26
+ 50  50  50  82  82  82  58  58  58   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  6   6   6  54  54  54  86  86  86  66  66  66
+ 38  38  38  18  18  18   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  22  22  22  50  50  50
+ 78  78  78  34  34  34   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   6   6   6  70  70  70
+ 78  78  78  46  46  46  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  18  18  18  42  42  42  82  82  82
+ 26  26  26   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  14  14  14
+ 46  46  46  34  34  34   6   6   6   2   2   6
+ 42  42  42  78  78  78  42  42  42  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   0   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 10  10  10  30  30  30  66  66  66  58  58  58
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  26  26  26
+ 86  86  86 101 101 101  46  46  46  10  10  10
+  2   2   6  58  58  58  70  70  70  34  34  34
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 14  14  14  42  42  42  86  86  86  10  10  10
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  30  30  30
+ 94  94  94  94  94  94  58  58  58  26  26  26
+  2   2   6   6   6   6  78  78  78  54  54  54
+ 22  22  22   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 22  22  22  62  62  62  62  62  62   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  26  26  26
+ 54  54  54  38  38  38  18  18  18  10  10  10
+  2   2   6   2   2   6  34  34  34  82  82  82
+ 38  38  38  14  14  14   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 30  30  30  78  78  78  30  30  30   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  10  10  10
+ 10  10  10   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  78  78  78
+ 50  50  50  18  18  18   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 38  38  38  86  86  86  14  14  14   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  54  54  54
+ 66  66  66  26  26  26   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 42  42  42  82  82  82   2   2   6   2   2   6
+  2   2   6   6   6   6  10  10  10   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   6   6   6
+ 14  14  14  10  10  10   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  18  18  18
+ 82  82  82  34  34  34  10  10  10   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 46  46  46  86  86  86   2   2   6   2   2   6
+  6   6   6   6   6   6  22  22  22  34  34  34
+  6   6   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  18  18  18  34  34  34
+ 10  10  10  50  50  50  22  22  22   2   2   6
+  2   2   6   2   2   6   2   2   6  10  10  10
+ 86  86  86  42  42  42  14  14  14   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 46  46  46  86  86  86   2   2   6   2   2   6
+ 38  38  38 116 116 116  94  94  94  22  22  22
+ 22  22  22   2   2   6   2   2   6   2   2   6
+ 14  14  14  86  86  86 138 138 138 162 162 162
+154 154 154  38  38  38  26  26  26   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 86  86  86  46  46  46  14  14  14   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 46  46  46  86  86  86   2   2   6  14  14  14
+134 134 134 198 198 198 195 195 195 116 116 116
+ 10  10  10   2   2   6   2   2   6   6   6   6
+101  98  89 187 187 187 210 210 210 218 218 218
+214 214 214 134 134 134  14  14  14   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 86  86  86  50  50  50  18  18  18   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   1   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 46  46  46  86  86  86   2   2   6  54  54  54
+218 218 218 195 195 195 226 226 226 246 246 246
+ 58  58  58   2   2   6   2   2   6  30  30  30
+210 210 210 253 253 253 174 174 174 123 123 123
+221 221 221 234 234 234  74  74  74   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 70  70  70  58  58  58  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 46  46  46  82  82  82   2   2   6 106 106 106
+170 170 170  26  26  26  86  86  86 226 226 226
+123 123 123  10  10  10  14  14  14  46  46  46
+231 231 231 190 190 190   6   6   6  70  70  70
+ 90  90  90 238 238 238 158 158 158   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 70  70  70  58  58  58  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   1   0   0   0
+  0   0   1   0   0   1   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 42  42  42  86  86  86   6   6   6 116 116 116
+106 106 106   6   6   6  70  70  70 149 149 149
+128 128 128  18  18  18  38  38  38  54  54  54
+221 221 221 106 106 106   2   2   6  14  14  14
+ 46  46  46 190 190 190 198 198 198   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 74  74  74  62  62  62  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   1   0   0   0
+  0   0   1   0   0   0   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 42  42  42  94  94  94  14  14  14 101 101 101
+128 128 128   2   2   6  18  18  18 116 116 116
+118  98  46 121  92   8 121  92   8  98  78  10
+162 162 162 106 106 106   2   2   6   2   2   6
+  2   2   6 195 195 195 195 195 195   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 74  74  74  62  62  62  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   1   0   0   1
+  0   0   1   0   0   0   0   0   1   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 38  38  38  90  90  90  14  14  14  58  58  58
+210 210 210  26  26  26  54  38   6 154 114  10
+226 170  11 236 186  11 225 175  15 184 144  12
+215 174  15 175 146  61  37  26   9   2   2   6
+ 70  70  70 246 246 246 138 138 138   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 70  70  70  66  66  66  26  26  26   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 38  38  38  86  86  86  14  14  14  10  10  10
+195 195 195 188 164 115 192 133   9 225 175  15
+239 182  13 234 190  10 232 195  16 232 200  30
+245 207  45 241 208  19 232 195  16 184 144  12
+218 194 134 211 206 186  42  42  42   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 50  50  50  74  74  74  30  30  30   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 34  34  34  86  86  86  14  14  14   2   2   6
+121  87  25 192 133   9 219 162  10 239 182  13
+236 186  11 232 195  16 241 208  19 244 214  54
+246 218  60 246 218  38 246 215  20 241 208  19
+241 208  19 226 184  13 121  87  25   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 50  50  50  82  82  82  34  34  34  10  10  10
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 34  34  34  82  82  82  30  30  30  61  42   6
+180 123   7 206 145  10 230 174  11 239 182  13
+234 190  10 238 202  15 241 208  19 246 218  74
+246 218  38 246 215  20 246 215  20 246 215  20
+226 184  13 215 174  15 184 144  12   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 26  26  26  94  94  94  42  42  42  14  14  14
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  78  78  78  50  50  50 104  69   6
+192 133   9 216 158  10 236 178  12 236 186  11
+232 195  16 241 208  19 244 214  54 245 215  43
+246 215  20 246 215  20 241 208  19 198 155  10
+200 144  11 216 158  10 156 118  10   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  6   6   6  90  90  90  54  54  54  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  78  78  78  46  46  46  22  22  22
+137  92   6 210 162  10 239 182  13 238 190  10
+238 202  15 241 208  19 246 215  20 246 215  20
+241 208  19 203 166  17 185 133  11 210 150  10
+216 158  10 210 150  10 102  78  10   2   2   6
+  6   6   6  54  54  54  14  14  14   2   2   6
+  2   2   6  62  62  62  74  74  74  30  30  30
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 34  34  34  78  78  78  50  50  50   6   6   6
+ 94  70  30 139 102  15 190 146  13 226 184  13
+232 200  30 232 195  16 215 174  15 190 146  13
+168 122  10 192 133   9 210 150  10 213 154  11
+202 150  34 182 157 106 101  98  89   2   2   6
+  2   2   6  78  78  78 116 116 116  58  58  58
+  2   2   6  22  22  22  90  90  90  46  46  46
+ 18  18  18   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 38  38  38  86  86  86  50  50  50   6   6   6
+128 128 128 174 154 114 156 107  11 168 122  10
+198 155  10 184 144  12 197 138  11 200 144  11
+206 145  10 206 145  10 197 138  11 188 164 115
+195 195 195 198 198 198 174 174 174  14  14  14
+  2   2   6  22  22  22 116 116 116 116 116 116
+ 22  22  22   2   2   6  74  74  74  70  70  70
+ 30  30  30  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 50  50  50 101 101 101  26  26  26  10  10  10
+138 138 138 190 190 190 174 154 114 156 107  11
+197 138  11 200 144  11 197 138  11 192 133   9
+180 123   7 190 142  34 190 178 144 187 187 187
+202 202 202 221 221 221 214 214 214  66  66  66
+  2   2   6   2   2   6  50  50  50  62  62  62
+  6   6   6   2   2   6  10  10  10  90  90  90
+ 50  50  50  18  18  18   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0  10  10  10  34  34  34
+ 74  74  74  74  74  74   2   2   6   6   6   6
+144 144 144 198 198 198 190 190 190 178 166 146
+154 121  60 156 107  11 156 107  11 168 124  44
+174 154 114 187 187 187 190 190 190 210 210 210
+246 246 246 253 253 253 253 253 253 182 182 182
+  6   6   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  62  62  62
+ 74  74  74  34  34  34  14  14  14   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0  10  10  10  22  22  22  54  54  54
+ 94  94  94  18  18  18   2   2   6  46  46  46
+234 234 234 221 221 221 190 190 190 190 190 190
+190 190 190 187 187 187 187 187 187 190 190 190
+190 190 190 195 195 195 214 214 214 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+ 82  82  82   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  14  14  14
+ 86  86  86  54  54  54  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  18  18  18  46  46  46  90  90  90
+ 46  46  46  18  18  18   6   6   6 182 182 182
+253 253 253 246 246 246 206 206 206 190 190 190
+190 190 190 190 190 190 190 190 190 190 190 190
+206 206 206 231 231 231 250 250 250 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+202 202 202  14  14  14   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 42  42  42  86  86  86  42  42  42  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 14  14  14  38  38  38  74  74  74  66  66  66
+  2   2   6   6   6   6  90  90  90 250 250 250
+253 253 253 253 253 253 238 238 238 198 198 198
+190 190 190 190 190 190 195 195 195 221 221 221
+246 246 246 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253  82  82  82   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  78  78  78  70  70  70  34  34  34
+ 14  14  14   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 34  34  34  66  66  66  78  78  78   6   6   6
+  2   2   6  18  18  18 218 218 218 253 253 253
+253 253 253 253 253 253 253 253 253 246 246 246
+226 226 226 231 231 231 246 246 246 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 178 178 178   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  18  18  18  90  90  90  62  62  62
+ 30  30  30  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0  10  10  10  26  26  26
+ 58  58  58  90  90  90  18  18  18   2   2   6
+  2   2   6 110 110 110 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+250 250 250 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 231 231 231  18  18  18   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  18  18  18  94  94  94
+ 54  54  54  26  26  26  10  10  10   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  22  22  22  50  50  50
+ 90  90  90  26  26  26   2   2   6   2   2   6
+ 14  14  14 195 195 195 250 250 250 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+250 250 250 242 242 242  54  54  54   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6  38  38  38
+ 86  86  86  50  50  50  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  14  14  14  38  38  38  82  82  82
+ 34  34  34   2   2   6   2   2   6   2   2   6
+ 42  42  42 195 195 195 246 246 246 253 253 253
+253 253 253 253 253 253 253 253 253 250 250 250
+242 242 242 242 242 242 250 250 250 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 250 250 250 246 246 246 238 238 238
+226 226 226 231 231 231 101 101 101   6   6   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 38  38  38  82  82  82  42  42  42  14  14  14
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 10  10  10  26  26  26  62  62  62  66  66  66
+  2   2   6   2   2   6   2   2   6   6   6   6
+ 70  70  70 170 170 170 206 206 206 234 234 234
+246 246 246 250 250 250 250 250 250 238 238 238
+226 226 226 231 231 231 238 238 238 250 250 250
+250 250 250 250 250 250 246 246 246 231 231 231
+214 214 214 206 206 206 202 202 202 202 202 202
+198 198 198 202 202 202 182 182 182  18  18  18
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  62  62  62  66  66  66  30  30  30
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 14  14  14  42  42  42  82  82  82  18  18  18
+  2   2   6   2   2   6   2   2   6  10  10  10
+ 94  94  94 182 182 182 218 218 218 242 242 242
+250 250 250 253 253 253 253 253 253 250 250 250
+234 234 234 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 246 246 246
+238 238 238 226 226 226 210 210 210 202 202 202
+195 195 195 195 195 195 210 210 210 158 158 158
+  6   6   6  14  14  14  50  50  50  14  14  14
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   6   6   6  86  86  86  46  46  46
+ 18  18  18   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 22  22  22  54  54  54  70  70  70   2   2   6
+  2   2   6  10  10  10   2   2   6  22  22  22
+166 166 166 231 231 231 250 250 250 253 253 253
+253 253 253 253 253 253 253 253 253 250 250 250
+242 242 242 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 246 246 246
+231 231 231 206 206 206 198 198 198 226 226 226
+ 94  94  94   2   2   6   6   6   6  38  38  38
+ 30  30  30   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  62  62  62  66  66  66
+ 26  26  26  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  74  74  74  50  50  50   2   2   6
+ 26  26  26  26  26  26   2   2   6 106 106 106
+238 238 238 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 246 246 246 218 218 218 202 202 202
+210 210 210  14  14  14   2   2   6   2   2   6
+ 30  30  30  22  22  22   2   2   6   2   2   6
+  2   2   6   2   2   6  18  18  18  86  86  86
+ 42  42  42  14  14  14   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 42  42  42  90  90  90  22  22  22   2   2   6
+ 42  42  42   2   2   6  18  18  18 218 218 218
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 250 250 250 221 221 221
+218 218 218 101 101 101   2   2   6  14  14  14
+ 18  18  18  38  38  38  10  10  10   2   2   6
+  2   2   6   2   2   6   2   2   6  78  78  78
+ 58  58  58  22  22  22   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 54  54  54  82  82  82   2   2   6  26  26  26
+ 22  22  22   2   2   6 123 123 123 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 250 250 250
+238 238 238 198 198 198   6   6   6  38  38  38
+ 58  58  58  26  26  26  38  38  38   2   2   6
+  2   2   6   2   2   6   2   2   6  46  46  46
+ 78  78  78  30  30  30  10  10  10   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0  10  10  10  30  30  30
+ 74  74  74  58  58  58   2   2   6  42  42  42
+  2   2   6  22  22  22 231 231 231 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 250 250 250
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 246 246 246  46  46  46  38  38  38
+ 42  42  42  14  14  14  38  38  38  14  14  14
+  2   2   6   2   2   6   2   2   6   6   6   6
+ 86  86  86  46  46  46  14  14  14   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  14  14  14  42  42  42
+ 90  90  90  18  18  18  18  18  18  26  26  26
+  2   2   6 116 116 116 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 250 250 250 238 238 238
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253  94  94  94   6   6   6
+  2   2   6   2   2   6  10  10  10  34  34  34
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 74  74  74  58  58  58  22  22  22   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0  10  10  10  26  26  26  66  66  66
+ 82  82  82   2   2   6  38  38  38   6   6   6
+ 14  14  14 210 210 210 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 246 246 246 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 144 144 144   2   2   6
+  2   2   6   2   2   6   2   2   6  46  46  46
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 42  42  42  74  74  74  30  30  30  10  10  10
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  14  14  14  42  42  42  90  90  90
+ 26  26  26   6   6   6  42  42  42   2   2   6
+ 74  74  74 250 250 250 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 242 242 242 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 182 182 182   2   2   6
+  2   2   6   2   2   6   2   2   6  46  46  46
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 10  10  10  86  86  86  38  38  38  10  10  10
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 10  10  10  26  26  26  66  66  66  82  82  82
+  2   2   6  22  22  22  18  18  18   2   2   6
+149 149 149 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 234 234 234 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 206 206 206   2   2   6
+  2   2   6   2   2   6   2   2   6  38  38  38
+  2   2   6   2   2   6   2   2   6   2   2   6
+  6   6   6  86  86  86  46  46  46  14  14  14
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 18  18  18  46  46  46  86  86  86  18  18  18
+  2   2   6  34  34  34  10  10  10   6   6   6
+210 210 210 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 234 234 234 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 221 221 221   6   6   6
+  2   2   6   2   2   6   6   6   6  30  30  30
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  82  82  82  54  54  54  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 26  26  26  66  66  66  62  62  62   2   2   6
+  2   2   6  38  38  38  10  10  10  26  26  26
+238 238 238 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 238 238 238
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231   6   6   6
+  2   2   6   2   2   6  10  10  10  30  30  30
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  66  66  66  58  58  58  22  22  22
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 38  38  38  78  78  78   6   6   6   2   2   6
+  2   2   6  46  46  46  14  14  14  42  42  42
+246 246 246 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 234 234 234  10  10  10
+  2   2   6   2   2   6  22  22  22  14  14  14
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  66  66  66  62  62  62  22  22  22
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 50  50  50  74  74  74   2   2   6   2   2   6
+ 14  14  14  70  70  70  34  34  34  62  62  62
+250 250 250 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 246 246 246
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 234 234 234  14  14  14
+  2   2   6   2   2   6  30  30  30   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  66  66  66  62  62  62  22  22  22
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 54  54  54  62  62  62   2   2   6   2   2   6
+  2   2   6  30  30  30  46  46  46  70  70  70
+250 250 250 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 246 246 246
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 226 226 226  10  10  10
+  2   2   6   6   6   6  30  30  30   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6  66  66  66  58  58  58  22  22  22
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  22  22  22
+ 58  58  58  62  62  62   2   2   6   2   2   6
+  2   2   6   2   2   6  30  30  30  78  78  78
+250 250 250 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 246 246 246
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 206 206 206   2   2   6
+ 22  22  22  34  34  34  18  14   6  22  22  22
+ 26  26  26  18  18  18   6   6   6   2   2   6
+  2   2   6  82  82  82  54  54  54  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  26  26  26
+ 62  62  62 106 106 106  74  54  14 185 133  11
+210 162  10 121  92   8   6   6   6  62  62  62
+238 238 238 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 246 246 246
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 158 158 158  18  18  18
+ 14  14  14   2   2   6   2   2   6   2   2   6
+  6   6   6  18  18  18  66  66  66  38  38  38
+  6   6   6  94  94  94  50  50  50  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 10  10  10  10  10  10  18  18  18  38  38  38
+ 78  78  78 142 134 106 216 158  10 242 186  14
+246 190  14 246 190  14 156 118  10  10  10  10
+ 90  90  90 238 238 238 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 250 250 250
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 246 230 190
+238 204  91 238 204  91 181 142  44  37  26   9
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  38  38  38  46  46  46
+ 26  26  26 106 106 106  54  54  54  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  14  14  14  22  22  22
+ 30  30  30  38  38  38  50  50  50  70  70  70
+106 106 106 190 142  34 226 170  11 242 186  14
+246 190  14 246 190  14 246 190  14 154 114  10
+  6   6   6  74  74  74 226 226 226 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 231 231 231 250 250 250
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 228 184  62
+241 196  14 241 208  19 232 195  16  38  30  10
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   6   6   6  30  30  30  26  26  26
+203 166  17 154 142  90  66  66  66  26  26  26
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  18  18  18  38  38  38  58  58  58
+ 78  78  78  86  86  86 101 101 101 123 123 123
+175 146  61 210 150  10 234 174  13 246 186  14
+246 190  14 246 190  14 246 190  14 238 190  10
+102  78  10   2   2   6  46  46  46 198 198 198
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 234 234 234 242 242 242
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 224 178  62
+242 186  14 241 196  14 210 166  10  22  18   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   6   6   6 121  92   8
+238 202  15 232 195  16  82  82  82  34  34  34
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+ 14  14  14  38  38  38  70  70  70 154 122  46
+190 142  34 200 144  11 197 138  11 197 138  11
+213 154  11 226 170  11 242 186  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+225 175  15  46  32   6   2   2   6  22  22  22
+158 158 158 250 250 250 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 250 250 250 242 242 242 224 178  62
+239 182  13 236 186  11 213 154  11  46  32   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  61  42   6 225 175  15
+238 190  10 236 186  11 112 100  78  42  42  42
+ 14  14  14   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 22  22  22  54  54  54 154 122  46 213 154  11
+226 170  11 230 174  11 226 170  11 226 170  11
+236 178  12 242 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+241 196  14 184 144  12  10  10  10   2   2   6
+  6   6   6 116 116 116 242 242 242 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 231 231 231 198 198 198 214 170  54
+236 178  12 236 178  12 210 150  10 137  92   6
+ 18  14   6   2   2   6   2   2   6   2   2   6
+  6   6   6  70  47   6 200 144  11 236 178  12
+239 182  13 239 182  13 124 112  88  58  58  58
+ 22  22  22   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  70  70  70 180 133  36 226 170  11
+239 182  13 242 186  14 242 186  14 246 186  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 232 195  16  98  70   6   2   2   6
+  2   2   6   2   2   6  66  66  66 221 221 221
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 206 206 206 198 198 198 214 166  58
+230 174  11 230 174  11 216 158  10 192 133   9
+163 110   8 116  81   8 102  78  10 116  81   8
+167 114   7 197 138  11 226 170  11 239 182  13
+242 186  14 242 186  14 162 146  94  78  78  78
+ 34  34  34  14  14  14   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 30  30  30  78  78  78 190 142  34 226 170  11
+239 182  13 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 241 196  14 203 166  17  22  18   6
+  2   2   6   2   2   6   2   2   6  38  38  38
+218 218 218 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+250 250 250 206 206 206 198 198 198 202 162  69
+226 170  11 236 178  12 224 166  10 210 150  10
+200 144  11 197 138  11 192 133   9 197 138  11
+210 150  10 226 170  11 242 186  14 246 190  14
+246 190  14 246 186  14 225 175  15 124 112  88
+ 62  62  62  30  30  30  14  14  14   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  78  78  78 174 135  50 224 166  10
+239 182  13 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 241 196  14 139 102  15
+  2   2   6   2   2   6   2   2   6   2   2   6
+ 78  78  78 250 250 250 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+250 250 250 214 214 214 198 198 198 190 150  46
+219 162  10 236 178  12 234 174  13 224 166  10
+216 158  10 213 154  11 213 154  11 216 158  10
+226 170  11 239 182  13 246 190  14 246 190  14
+246 190  14 246 190  14 242 186  14 206 162  42
+101 101 101  58  58  58  30  30  30  14  14  14
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  74  74  74 174 135  50 216 158  10
+236 178  12 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 241 196  14 226 184  13
+ 61  42   6   2   2   6   2   2   6   2   2   6
+ 22  22  22 238 238 238 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 226 226 226 187 187 187 180 133  36
+216 158  10 236 178  12 239 182  13 236 178  12
+230 174  11 226 170  11 226 170  11 230 174  11
+236 178  12 242 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 186  14 239 182  13
+206 162  42 106 106 106  66  66  66  34  34  34
+ 14  14  14   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 26  26  26  70  70  70 163 133  67 213 154  11
+236 178  12 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 241 196  14
+190 146  13  18  14   6   2   2   6   2   2   6
+ 46  46  46 246 246 246 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 221 221 221  86  86  86 156 107  11
+216 158  10 236 178  12 242 186  14 246 186  14
+242 186  14 239 182  13 239 182  13 242 186  14
+242 186  14 246 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+242 186  14 225 175  15 142 122  72  66  66  66
+ 30  30  30  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 26  26  26  70  70  70 163 133  67 210 150  10
+236 178  12 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+232 195  16 121  92   8  34  34  34 106 106 106
+221 221 221 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+242 242 242  82  82  82  18  14   6 163 110   8
+216 158  10 236 178  12 242 186  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 242 186  14 163 133  67
+ 46  46  46  18  18  18   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  10  10  10
+ 30  30  30  78  78  78 163 133  67 210 150  10
+236 178  12 246 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+241 196  14 215 174  15 190 178 144 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 218 218 218
+ 58  58  58   2   2   6  22  18   6 167 114   7
+216 158  10 236 178  12 246 186  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 186  14 242 186  14 190 150  46
+ 54  54  54  22  22  22   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 38  38  38  86  86  86 180 133  36 213 154  11
+236 178  12 246 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 232 195  16 190 146  13 214 214 214
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 250 250 250 170 170 170  26  26  26
+  2   2   6   2   2   6  37  26   9 163 110   8
+219 162  10 239 182  13 246 186  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 186  14 236 178  12 224 166  10 142 122  72
+ 46  46  46  18  18  18   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 50  50  50 109 106  95 192 133   9 224 166  10
+242 186  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+242 186  14 226 184  13 210 162  10 142 110  46
+226 226 226 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+253 253 253 253 253 253 253 253 253 253 253 253
+198 198 198  66  66  66   2   2   6   2   2   6
+  2   2   6   2   2   6  50  34   6 156 107  11
+219 162  10 239 182  13 246 186  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 242 186  14
+234 174  13 213 154  11 154 122  46  66  66  66
+ 30  30  30  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  22  22  22
+ 58  58  58 154 121  60 206 145  10 234 174  13
+242 186  14 246 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 186  14 236 178  12 210 162  10 163 110   8
+ 61  42   6 138 138 138 218 218 218 250 250 250
+253 253 253 253 253 253 253 253 253 250 250 250
+242 242 242 210 210 210 144 144 144  66  66  66
+  6   6   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6  61  42   6 163 110   8
+216 158  10 236 178  12 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 239 182  13 230 174  11 216 158  10
+190 142  34 124 112  88  70  70  70  38  38  38
+ 18  18  18   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  22  22  22
+ 62  62  62 168 124  44 206 145  10 224 166  10
+236 178  12 239 182  13 242 186  14 242 186  14
+246 186  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 236 178  12 216 158  10 175 118   6
+ 80  54   7   2   2   6   6   6   6  30  30  30
+ 54  54  54  62  62  62  50  50  50  38  38  38
+ 14  14  14   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   6   6   6  80  54   7 167 114   7
+213 154  11 236 178  12 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 190  14 242 186  14 239 182  13 239 182  13
+230 174  11 210 150  10 174 135  50 124 112  88
+ 82  82  82  54  54  54  34  34  34  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  18  18  18
+ 50  50  50 158 118  36 192 133   9 200 144  11
+216 158  10 219 162  10 224 166  10 226 170  11
+230 174  11 236 178  12 239 182  13 239 182  13
+242 186  14 246 186  14 246 190  14 246 190  14
+246 190  14 246 190  14 246 190  14 246 190  14
+246 186  14 230 174  11 210 150  10 163 110   8
+104  69   6  10  10  10   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   6   6   6  91  60   6 167 114   7
+206 145  10 230 174  11 242 186  14 246 190  14
+246 190  14 246 190  14 246 186  14 242 186  14
+239 182  13 230 174  11 224 166  10 213 154  11
+180 133  36 124 112  88  86  86  86  58  58  58
+ 38  38  38  22  22  22  10  10  10   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0  14  14  14
+ 34  34  34  70  70  70 138 110  50 158 118  36
+167 114   7 180 123   7 192 133   9 197 138  11
+200 144  11 206 145  10 213 154  11 219 162  10
+224 166  10 230 174  11 239 182  13 242 186  14
+246 186  14 246 186  14 246 186  14 246 186  14
+239 182  13 216 158  10 185 133  11 152  99   6
+104  69   6  18  14   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   2   2   6   2   2   6   2   2   6
+  2   2   6   6   6   6  80  54   7 152  99   6
+192 133   9 219 162  10 236 178  12 239 182  13
+246 186  14 242 186  14 239 182  13 236 178  12
+224 166  10 206 145  10 192 133   9 154 121  60
+ 94  94  94  62  62  62  42  42  42  22  22  22
+ 14  14  14   6   6   6   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 18  18  18  34  34  34  58  58  58  78  78  78
+101  98  89 124 112  88 142 110  46 156 107  11
+163 110   8 167 114   7 175 118   6 180 123   7
+185 133  11 197 138  11 210 150  10 219 162  10
+226 170  11 236 178  12 236 178  12 234 174  13
+219 162  10 197 138  11 163 110   8 130  83   6
+ 91  60   6  10  10  10   2   2   6   2   2   6
+ 18  18  18  38  38  38  38  38  38  38  38  38
+ 38  38  38  38  38  38  38  38  38  38  38  38
+ 38  38  38  38  38  38  26  26  26   2   2   6
+  2   2   6   6   6   6  70  47   6 137  92   6
+175 118   6 200 144  11 219 162  10 230 174  11
+234 174  13 230 174  11 219 162  10 210 150  10
+192 133   9 163 110   8 124 112  88  82  82  82
+ 50  50  50  30  30  30  14  14  14   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  14  14  14  22  22  22  34  34  34
+ 42  42  42  58  58  58  74  74  74  86  86  86
+101  98  89 122 102  70 130  98  46 121  87  25
+137  92   6 152  99   6 163 110   8 180 123   7
+185 133  11 197 138  11 206 145  10 200 144  11
+180 123   7 156 107  11 130  83   6 104  69   6
+ 50  34   6  54  54  54 110 110 110 101  98  89
+ 86  86  86  82  82  82  78  78  78  78  78  78
+ 78  78  78  78  78  78  78  78  78  78  78  78
+ 78  78  78  82  82  82  86  86  86  94  94  94
+106 106 106 101 101 101  86  66  34 124  80   6
+156 107  11 180 123   7 192 133   9 200 144  11
+206 145  10 200 144  11 192 133   9 175 118   6
+139 102  15 109 106  95  70  70  70  42  42  42
+ 22  22  22  10  10  10   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   6   6   6  10  10  10
+ 14  14  14  22  22  22  30  30  30  38  38  38
+ 50  50  50  62  62  62  74  74  74  90  90  90
+101  98  89 112 100  78 121  87  25 124  80   6
+137  92   6 152  99   6 152  99   6 152  99   6
+138  86   6 124  80   6  98  70   6  86  66  30
+101  98  89  82  82  82  58  58  58  46  46  46
+ 38  38  38  34  34  34  34  34  34  34  34  34
+ 34  34  34  34  34  34  34  34  34  34  34  34
+ 34  34  34  34  34  34  38  38  38  42  42  42
+ 54  54  54  82  82  82  94  86  76  91  60   6
+134  86   6 156 107  11 167 114   7 175 118   6
+175 118   6 167 114   7 152  99   6 121  87  25
+101  98  89  62  62  62  34  34  34  18  18  18
+  6   6   6   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6   6   6   6  10  10  10
+ 18  18  18  22  22  22  30  30  30  42  42  42
+ 50  50  50  66  66  66  86  86  86 101  98  89
+106  86  58  98  70   6 104  69   6 104  69   6
+104  69   6  91  60   6  82  62  34  90  90  90
+ 62  62  62  38  38  38  22  22  22  14  14  14
+ 10  10  10  10  10  10  10  10  10  10  10  10
+ 10  10  10  10  10  10   6   6   6  10  10  10
+ 10  10  10  10  10  10  10  10  10  14  14  14
+ 22  22  22  42  42  42  70  70  70  89  81  66
+ 80  54   7 104  69   6 124  80   6 137  92   6
+134  86   6 116  81   8 100  82  52  86  86  86
+ 58  58  58  30  30  30  14  14  14   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  10  10  10  14  14  14
+ 18  18  18  26  26  26  38  38  38  54  54  54
+ 70  70  70  86  86  86  94  86  76  89  81  66
+ 89  81  66  86  86  86  74  74  74  50  50  50
+ 30  30  30  14  14  14   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6  18  18  18  34  34  34  58  58  58
+ 82  82  82  89  81  66  89  81  66  89  81  66
+ 94  86  66  94  86  76  74  74  74  50  50  50
+ 26  26  26  14  14  14   6   6   6   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  6   6   6   6   6   6  14  14  14  18  18  18
+ 30  30  30  38  38  38  46  46  46  54  54  54
+ 50  50  50  42  42  42  30  30  30  18  18  18
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   6   6   6  14  14  14  26  26  26
+ 38  38  38  50  50  50  58  58  58  58  58  58
+ 54  54  54  42  42  42  30  30  30  18  18  18
+ 10  10  10   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+  6   6   6  10  10  10  14  14  14  18  18  18
+ 18  18  18  14  14  14  10  10  10   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   6   6   6
+ 14  14  14  18  18  18  22  22  22  22  22  22
+ 18  18  18  14  14  14  10  10  10   6   6   6
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
+  0   0   0   0   0   0   0   0   0   0   0   0
index 2bd1257..efc7f07 100644 (file)
@@ -42,7 +42,7 @@ config W1_MASTER_MXC
 
 config W1_MASTER_DS1WM
        tristate "Maxim DS1WM 1-wire busmaster"
-       depends on W1 && GENERIC_HARDIRQS
+       depends on W1
        help
          Say Y here to enable the DS1WM 1-wire driver, such as that
          in HP iPAQ devices like h5xxx, h2200, and ASIC3-based like
index 47e12cf..15c7251 100644 (file)
@@ -152,8 +152,6 @@ static int mxc_w1_remove(struct platform_device *pdev)
 
        clk_disable_unprepare(mdev->clk);
 
-       platform_set_drvdata(pdev, NULL);
-
        return 0;
 }
 
index 22013ca..c7c64f1 100644 (file)
@@ -234,9 +234,11 @@ static ssize_t w1_master_attribute_store_search(struct device * dev,
 {
        long tmp;
        struct w1_master *md = dev_to_w1_master(dev);
+       int ret;
 
-       if (strict_strtol(buf, 0, &tmp) == -EINVAL)
-               return -EINVAL;
+       ret = kstrtol(buf, 0, &tmp);
+       if (ret)
+               return ret;
 
        mutex_lock(&md->mutex);
        md->search_count = tmp;
@@ -266,9 +268,11 @@ static ssize_t w1_master_attribute_store_pullup(struct device *dev,
 {
        long tmp;
        struct w1_master *md = dev_to_w1_master(dev);
+       int ret;
 
-       if (strict_strtol(buf, 0, &tmp) == -EINVAL)
-               return -EINVAL;
+       ret = kstrtol(buf, 0, &tmp);
+       if (ret)
+               return ret;
 
        mutex_lock(&md->mutex);
        md->enable_pullup = tmp;
index 362085d..d1d53f3 100644 (file)
@@ -290,6 +290,16 @@ config ORION_WATCHDOG
          To compile this driver as a module, choose M here: the
          module will be called orion_wdt.
 
+config SUNXI_WATCHDOG
+       tristate "Allwinner SoCs watchdog support"
+       depends on ARCH_SUNXI
+       select WATCHDOG_CORE
+       help
+         Say Y here to include support for the watchdog timer
+         in Allwinner SoCs.
+         To compile this driver as a module, choose M here: the
+         module will be called sunxi_wdt.
+
 config COH901327_WATCHDOG
        bool "ST-Ericsson COH 901 327 watchdog"
        depends on ARCH_U300
index 2f26a0b..6c5bb27 100644 (file)
@@ -46,6 +46,7 @@ obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o
 obj-$(CONFIG_IOP_WATCHDOG) += iop_wdt.o
 obj-$(CONFIG_DAVINCI_WATCHDOG) += davinci_wdt.o
 obj-$(CONFIG_ORION_WATCHDOG) += orion_wdt.o
+obj-$(CONFIG_SUNXI_WATCHDOG) += sunxi_wdt.o
 obj-$(CONFIG_COH901327_WATCHDOG) += coh901327_wdt.o
 obj-$(CONFIG_STMP3XXX_RTC_WATCHDOG) += stmp3xxx_rtc_wdt.o
 obj-$(CONFIG_NUC900_WATCHDOG) += nuc900_wdt.o
index 2f3cc8f..b3709f9 100644 (file)
@@ -280,11 +280,6 @@ static int ar7_wdt_probe(struct platform_device *pdev)
 
        ar7_regs_wdt =
                platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs");
-       if (!ar7_regs_wdt) {
-               pr_err("could not get registers resource\n");
-               return -ENODEV;
-       }
-
        ar7_wdt = devm_ioremap_resource(&pdev->dev, ar7_regs_wdt);
        if (IS_ERR(ar7_wdt))
                return PTR_ERR(ar7_wdt);
index de7e4f4..5be5e3d 100644 (file)
@@ -162,7 +162,8 @@ extern asmlinkage void asminline_call(struct cmn_registers *pi86Regs,
 #define HPWDT_ARCH     32
 
 asm(".text                          \n\t"
-    ".align 4                       \n"
+    ".align 4                       \n\t"
+    ".globl asminline_call         \n"
     "asminline_call:                \n\t"
     "pushl       %ebp               \n\t"
     "movl        %esp, %ebp         \n\t"
@@ -352,7 +353,8 @@ static int detect_cru_service(void)
 #define HPWDT_ARCH     64
 
 asm(".text                      \n\t"
-    ".align 4                   \n"
+    ".align 4                   \n\t"
+    ".globl asminline_call     \n"
     "asminline_call:            \n\t"
     "pushq      %rbp            \n\t"
     "movq       %rsp, %rbp      \n\t"
index e2b6d2c..b15b6ef 100644 (file)
@@ -256,11 +256,6 @@ static int nuc900wdt_probe(struct platform_device *pdev)
        spin_lock_init(&nuc900_wdt->wdt_lock);
 
        res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (res == NULL) {
-               dev_err(&pdev->dev, "no memory resource specified\n");
-               return -ENOENT;
-       }
-
        nuc900_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
        if (IS_ERR(nuc900_wdt->wdt_base))
                return PTR_ERR(nuc900_wdt->wdt_base);
index 6a22cf5..23aad7c 100644 (file)
@@ -84,13 +84,17 @@ MODULE_PARM_DESC(soft_noboot, "Watchdog action, set to 1 to ignore reboots, "
                        "0 to reboot (default 0)");
 MODULE_PARM_DESC(debug, "Watchdog debug, set to >1 for debug (default 0)");
 
-static struct device    *wdt_dev;      /* platform device attached to */
-static struct resource *wdt_mem;
-static struct resource *wdt_irq;
-static struct clk      *wdt_clock;
-static void __iomem    *wdt_base;
-static unsigned int     wdt_count;
-static DEFINE_SPINLOCK(wdt_lock);
+struct s3c2410_wdt {
+       struct device           *dev;
+       struct clk              *clock;
+       void __iomem            *reg_base;
+       unsigned int            count;
+       spinlock_t              lock;
+       unsigned long           wtcon_save;
+       unsigned long           wtdat_save;
+       struct watchdog_device  wdt_device;
+       struct notifier_block   freq_transition;
+};
 
 /* watchdog control routines */
 
@@ -102,29 +106,38 @@ do {                                                      \
 
 /* functions */
 
+static inline struct s3c2410_wdt *freq_to_wdt(struct notifier_block *nb)
+{
+       return container_of(nb, struct s3c2410_wdt, freq_transition);
+}
+
 static int s3c2410wdt_keepalive(struct watchdog_device *wdd)
 {
-       spin_lock(&wdt_lock);
-       writel(wdt_count, wdt_base + S3C2410_WTCNT);
-       spin_unlock(&wdt_lock);
+       struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       spin_lock(&wdt->lock);
+       writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
+       spin_unlock(&wdt->lock);
 
        return 0;
 }
 
-static void __s3c2410wdt_stop(void)
+static void __s3c2410wdt_stop(struct s3c2410_wdt *wdt)
 {
        unsigned long wtcon;
 
-       wtcon = readl(wdt_base + S3C2410_WTCON);
+       wtcon = readl(wdt->reg_base + S3C2410_WTCON);
        wtcon &= ~(S3C2410_WTCON_ENABLE | S3C2410_WTCON_RSTEN);
-       writel(wtcon, wdt_base + S3C2410_WTCON);
+       writel(wtcon, wdt->reg_base + S3C2410_WTCON);
 }
 
 static int s3c2410wdt_stop(struct watchdog_device *wdd)
 {
-       spin_lock(&wdt_lock);
-       __s3c2410wdt_stop();
-       spin_unlock(&wdt_lock);
+       struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+
+       spin_lock(&wdt->lock);
+       __s3c2410wdt_stop(wdt);
+       spin_unlock(&wdt->lock);
 
        return 0;
 }
@@ -132,12 +145,13 @@ static int s3c2410wdt_stop(struct watchdog_device *wdd)
 static int s3c2410wdt_start(struct watchdog_device *wdd)
 {
        unsigned long wtcon;
+       struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
 
-       spin_lock(&wdt_lock);
+       spin_lock(&wdt->lock);
 
-       __s3c2410wdt_stop();
+       __s3c2410wdt_stop(wdt);
 
-       wtcon = readl(wdt_base + S3C2410_WTCON);
+       wtcon = readl(wdt->reg_base + S3C2410_WTCON);
        wtcon |= S3C2410_WTCON_ENABLE | S3C2410_WTCON_DIV128;
 
        if (soft_noboot) {
@@ -148,25 +162,26 @@ static int s3c2410wdt_start(struct watchdog_device *wdd)
                wtcon |= S3C2410_WTCON_RSTEN;
        }
 
-       DBG("%s: wdt_count=0x%08x, wtcon=%08lx\n",
-           __func__, wdt_count, wtcon);
+       DBG("%s: count=0x%08x, wtcon=%08lx\n",
+           __func__, wdt->count, wtcon);
 
-       writel(wdt_count, wdt_base + S3C2410_WTDAT);
-       writel(wdt_count, wdt_base + S3C2410_WTCNT);
-       writel(wtcon, wdt_base + S3C2410_WTCON);
-       spin_unlock(&wdt_lock);
+       writel(wdt->count, wdt->reg_base + S3C2410_WTDAT);
+       writel(wdt->count, wdt->reg_base + S3C2410_WTCNT);
+       writel(wtcon, wdt->reg_base + S3C2410_WTCON);
+       spin_unlock(&wdt->lock);
 
        return 0;
 }
 
-static inline int s3c2410wdt_is_running(void)
+static inline int s3c2410wdt_is_running(struct s3c2410_wdt *wdt)
 {
-       return readl(wdt_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE;
+       return readl(wdt->reg_base + S3C2410_WTCON) & S3C2410_WTCON_ENABLE;
 }
 
 static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeout)
 {
-       unsigned long freq = clk_get_rate(wdt_clock);
+       struct s3c2410_wdt *wdt = watchdog_get_drvdata(wdd);
+       unsigned long freq = clk_get_rate(wdt->clock);
        unsigned int count;
        unsigned int divisor = 1;
        unsigned long wtcon;
@@ -192,7 +207,7 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou
                }
 
                if ((count / divisor) >= 0x10000) {
-                       dev_err(wdt_dev, "timeout %d too big\n", timeout);
+                       dev_err(wdt->dev, "timeout %d too big\n", timeout);
                        return -EINVAL;
                }
        }
@@ -201,15 +216,15 @@ static int s3c2410wdt_set_heartbeat(struct watchdog_device *wdd, unsigned timeou
            __func__, timeout, divisor, count, count/divisor);
 
        count /= divisor;
-       wdt_count = count;
+       wdt->count = count;
 
        /* update the pre-scaler */
-       wtcon = readl(wdt_base + S3C2410_WTCON);
+       wtcon = readl(wdt->reg_base + S3C2410_WTCON);
        wtcon &= ~S3C2410_WTCON_PRESCALE_MASK;
        wtcon |= S3C2410_WTCON_PRESCALE(divisor-1);
 
-       writel(count, wdt_base + S3C2410_WTDAT);
-       writel(wtcon, wdt_base + S3C2410_WTCON);
+       writel(count, wdt->reg_base + S3C2410_WTDAT);
+       writel(wtcon, wdt->reg_base + S3C2410_WTCON);
 
        wdd->timeout = (count * divisor) / freq;
 
@@ -242,21 +257,23 @@ static struct watchdog_device s3c2410_wdd = {
 
 static irqreturn_t s3c2410wdt_irq(int irqno, void *param)
 {
-       dev_info(wdt_dev, "watchdog timer expired (irq)\n");
+       struct s3c2410_wdt *wdt = platform_get_drvdata(param);
+
+       dev_info(wdt->dev, "watchdog timer expired (irq)\n");
 
-       s3c2410wdt_keepalive(&s3c2410_wdd);
+       s3c2410wdt_keepalive(&wdt->wdt_device);
        return IRQ_HANDLED;
 }
 
-
 #ifdef CONFIG_CPU_FREQ
 
 static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb,
                                          unsigned long val, void *data)
 {
        int ret;
+       struct s3c2410_wdt *wdt = freq_to_wdt(nb);
 
-       if (!s3c2410wdt_is_running())
+       if (!s3c2410wdt_is_running(wdt))
                goto done;
 
        if (val == CPUFREQ_PRECHANGE) {
@@ -265,14 +282,15 @@ static int s3c2410wdt_cpufreq_transition(struct notifier_block *nb,
                 * the watchdog is running.
                 */
 
-               s3c2410wdt_keepalive(&s3c2410_wdd);
+               s3c2410wdt_keepalive(&wdt->wdt_device);
        } else if (val == CPUFREQ_POSTCHANGE) {
-               s3c2410wdt_stop(&s3c2410_wdd);
+               s3c2410wdt_stop(&wdt->wdt_device);
 
-               ret = s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout);
+               ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
+                                               wdt->wdt_device.timeout);
 
                if (ret >= 0)
-                       s3c2410wdt_start(&s3c2410_wdd);
+                       s3c2410wdt_start(&wdt->wdt_device);
                else
                        goto err;
        }
@@ -281,34 +299,35 @@ done:
        return 0;
 
  err:
-       dev_err(wdt_dev, "cannot set new value for timeout %d\n",
-                               s3c2410_wdd.timeout);
+       dev_err(wdt->dev, "cannot set new value for timeout %d\n",
+                               wdt->wdt_device.timeout);
        return ret;
 }
 
-static struct notifier_block s3c2410wdt_cpufreq_transition_nb = {
-       .notifier_call  = s3c2410wdt_cpufreq_transition,
-};
-
-static inline int s3c2410wdt_cpufreq_register(void)
+static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt)
 {
-       return cpufreq_register_notifier(&s3c2410wdt_cpufreq_transition_nb,
+       wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition;
+
+       return cpufreq_register_notifier(&wdt->freq_transition,
                                         CPUFREQ_TRANSITION_NOTIFIER);
 }
 
-static inline void s3c2410wdt_cpufreq_deregister(void)
+static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt)
 {
-       cpufreq_unregister_notifier(&s3c2410wdt_cpufreq_transition_nb,
+       wdt->freq_transition.notifier_call = s3c2410wdt_cpufreq_transition;
+
+       cpufreq_unregister_notifier(&wdt->freq_transition,
                                    CPUFREQ_TRANSITION_NOTIFIER);
 }
 
 #else
-static inline int s3c2410wdt_cpufreq_register(void)
+
+static inline int s3c2410wdt_cpufreq_register(struct s3c2410_wdt *wdt)
 {
        return 0;
 }
 
-static inline void s3c2410wdt_cpufreq_deregister(void)
+static inline void s3c2410wdt_cpufreq_deregister(struct s3c2410_wdt *wdt)
 {
 }
 #endif
@@ -316,6 +335,9 @@ static inline void s3c2410wdt_cpufreq_deregister(void)
 static int s3c2410wdt_probe(struct platform_device *pdev)
 {
        struct device *dev;
+       struct s3c2410_wdt *wdt;
+       struct resource *wdt_mem;
+       struct resource *wdt_irq;
        unsigned int wtcon;
        int started = 0;
        int ret;
@@ -323,13 +345,14 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
        DBG("%s: probe=%p\n", __func__, pdev);
 
        dev = &pdev->dev;
-       wdt_dev = &pdev->dev;
 
-       wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (wdt_mem == NULL) {
-               dev_err(dev, "no memory resource specified\n");
-               return -ENOENT;
-       }
+       wdt = devm_kzalloc(dev, sizeof(*wdt), GFP_KERNEL);
+       if (!wdt)
+               return -ENOMEM;
+
+       wdt->dev = &pdev->dev;
+       spin_lock_init(&wdt->lock);
+       wdt->wdt_device = s3c2410_wdd;
 
        wdt_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
        if (wdt_irq == NULL) {
@@ -339,35 +362,40 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
        }
 
        /* get the memory region for the watchdog timer */
-       wdt_base = devm_ioremap_resource(dev, wdt_mem);
-       if (IS_ERR(wdt_base)) {
-               ret = PTR_ERR(wdt_base);
+       wdt_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       wdt->reg_base = devm_ioremap_resource(dev, wdt_mem);
+       if (IS_ERR(wdt->reg_base)) {
+               ret = PTR_ERR(wdt->reg_base);
                goto err;
        }
 
-       DBG("probe: mapped wdt_base=%p\n", wdt_base);
+       DBG("probe: mapped reg_base=%p\n", wdt->reg_base);
 
-       wdt_clock = devm_clk_get(dev, "watchdog");
-       if (IS_ERR(wdt_clock)) {
+       wdt->clock = devm_clk_get(dev, "watchdog");
+       if (IS_ERR(wdt->clock)) {
                dev_err(dev, "failed to find watchdog clock source\n");
-               ret = PTR_ERR(wdt_clock);
+               ret = PTR_ERR(wdt->clock);
                goto err;
        }
 
-       clk_prepare_enable(wdt_clock);
+       clk_prepare_enable(wdt->clock);
 
-       ret = s3c2410wdt_cpufreq_register();
+       ret = s3c2410wdt_cpufreq_register(wdt);
        if (ret < 0) {
                dev_err(dev, "failed to register cpufreq\n");
                goto err_clk;
        }
 
+       watchdog_set_drvdata(&wdt->wdt_device, wdt);
+
        /* see if we can actually set the requested timer margin, and if
         * not, try the default value */
 
-       watchdog_init_timeout(&s3c2410_wdd, tmr_margin,  &pdev->dev);
-       if (s3c2410wdt_set_heartbeat(&s3c2410_wdd, s3c2410_wdd.timeout)) {
-               started = s3c2410wdt_set_heartbeat(&s3c2410_wdd,
+       watchdog_init_timeout(&wdt->wdt_device, tmr_margin, &pdev->dev);
+       ret = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
+                                       wdt->wdt_device.timeout);
+       if (ret) {
+               started = s3c2410wdt_set_heartbeat(&wdt->wdt_device,
                                        CONFIG_S3C2410_WATCHDOG_DEFAULT_TIME);
 
                if (started == 0)
@@ -386,9 +414,9 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
                goto err_cpufreq;
        }
 
-       watchdog_set_nowayout(&s3c2410_wdd, nowayout);
+       watchdog_set_nowayout(&wdt->wdt_device, nowayout);
 
-       ret = watchdog_register_device(&s3c2410_wdd);
+       ret = watchdog_register_device(&wdt->wdt_device);
        if (ret) {
                dev_err(dev, "cannot register watchdog (%d)\n", ret);
                goto err_cpufreq;
@@ -396,18 +424,20 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
 
        if (tmr_atboot && started == 0) {
                dev_info(dev, "starting watchdog timer\n");
-               s3c2410wdt_start(&s3c2410_wdd);
+               s3c2410wdt_start(&wdt->wdt_device);
        } else if (!tmr_atboot) {
                /* if we're not enabling the watchdog, then ensure it is
                 * disabled if it has been left running from the bootloader
                 * or other source */
 
-               s3c2410wdt_stop(&s3c2410_wdd);
+               s3c2410wdt_stop(&wdt->wdt_device);
        }
 
+       platform_set_drvdata(pdev, wdt);
+
        /* print out a statement of readiness */
 
-       wtcon = readl(wdt_base + S3C2410_WTCON);
+       wtcon = readl(wdt->reg_base + S3C2410_WTCON);
 
        dev_info(dev, "watchdog %sactive, reset %sabled, irq %sabled\n",
                 (wtcon & S3C2410_WTCON_ENABLE) ?  "" : "in",
@@ -417,64 +447,64 @@ static int s3c2410wdt_probe(struct platform_device *pdev)
        return 0;
 
  err_cpufreq:
-       s3c2410wdt_cpufreq_deregister();
+       s3c2410wdt_cpufreq_deregister(wdt);
 
  err_clk:
-       clk_disable_unprepare(wdt_clock);
-       wdt_clock = NULL;
+       clk_disable_unprepare(wdt->clock);
+       wdt->clock = NULL;
 
  err:
-       wdt_irq = NULL;
-       wdt_mem = NULL;
        return ret;
 }
 
 static int s3c2410wdt_remove(struct platform_device *dev)
 {
-       watchdog_unregister_device(&s3c2410_wdd);
+       struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
 
-       s3c2410wdt_cpufreq_deregister();
+       watchdog_unregister_device(&wdt->wdt_device);
 
-       clk_disable_unprepare(wdt_clock);
-       wdt_clock = NULL;
+       s3c2410wdt_cpufreq_deregister(wdt);
+
+       clk_disable_unprepare(wdt->clock);
+       wdt->clock = NULL;
 
-       wdt_irq = NULL;
-       wdt_mem = NULL;
        return 0;
 }
 
 static void s3c2410wdt_shutdown(struct platform_device *dev)
 {
-       s3c2410wdt_stop(&s3c2410_wdd);
+       struct s3c2410_wdt *wdt = platform_get_drvdata(dev);
+
+       s3c2410wdt_stop(&wdt->wdt_device);
 }
 
 #ifdef CONFIG_PM_SLEEP
 
-static unsigned long wtcon_save;
-static unsigned long wtdat_save;
-
 static int s3c2410wdt_suspend(struct device *dev)
 {
+       struct s3c2410_wdt *wdt = dev_get_drvdata(dev);
+
        /* Save watchdog state, and turn it off. */
-       wtcon_save = readl(wdt_base + S3C2410_WTCON);
-       wtdat_save = readl(wdt_base + S3C2410_WTDAT);
+       wdt->wtcon_save = readl(wdt->reg_base + S3C2410_WTCON);
+       wdt->wtdat_save = readl(wdt->reg_base + S3C2410_WTDAT);
 
        /* Note that WTCNT doesn't need to be saved. */
-       s3c2410wdt_stop(&s3c2410_wdd);
+       s3c2410wdt_stop(&wdt->wdt_device);
 
        return 0;
 }
 
 static int s3c2410wdt_resume(struct device *dev)
 {
-       /* Restore watchdog state. */
+       struct s3c2410_wdt *wdt = dev_get_drvdata(dev);
 
-       writel(wtdat_save, wdt_base + S3C2410_WTDAT);
-       writel(wtdat_save, wdt_base + S3C2410_WTCNT); /* Reset count */
-       writel(wtcon_save, wdt_base + S3C2410_WTCON);
+       /* Restore watchdog state. */
+       writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTDAT);
+       writel(wdt->wtdat_save, wdt->reg_base + S3C2410_WTCNT);/* Reset count */
+       writel(wdt->wtcon_save, wdt->reg_base + S3C2410_WTCON);
 
        dev_info(dev, "watchdog %sabled\n",
-               (wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis");
+               (wdt->wtcon_save & S3C2410_WTCON_ENABLE) ? "en" : "dis");
 
        return 0;
 }
diff --git a/drivers/watchdog/sunxi_wdt.c b/drivers/watchdog/sunxi_wdt.c
new file mode 100644 (file)
index 0000000..1f94b42
--- /dev/null
@@ -0,0 +1,237 @@
+/*
+ *      sunxi Watchdog Driver
+ *
+ *      Copyright (c) 2013 Carlo Caione
+ *                    2012 Henrik Nordstrom
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ *
+ *      Based on xen_wdt.c
+ *      (c) Copyright 2010 Novell, Inc.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/types.h>
+#include <linux/watchdog.h>
+
+#define WDT_MAX_TIMEOUT         16
+#define WDT_MIN_TIMEOUT         1
+#define WDT_MODE_TIMEOUT(n)     ((n) << 3)
+#define WDT_TIMEOUT_MASK        WDT_MODE_TIMEOUT(0x0F)
+
+#define WDT_CTRL                0x00
+#define WDT_CTRL_RELOAD         ((1 << 0) | (0x0a57 << 1))
+
+#define WDT_MODE                0x04
+#define WDT_MODE_EN             (1 << 0)
+#define WDT_MODE_RST_EN         (1 << 1)
+
+#define DRV_NAME               "sunxi-wdt"
+#define DRV_VERSION            "1.0"
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+static unsigned int timeout = WDT_MAX_TIMEOUT;
+
+struct sunxi_wdt_dev {
+       struct watchdog_device wdt_dev;
+       void __iomem *wdt_base;
+};
+
+/*
+ * wdt_timeout_map maps the watchdog timer interval value in seconds to
+ * the value of the register WDT_MODE bit 3:6
+ *
+ * [timeout seconds] = register value
+ *
+ */
+
+static const int wdt_timeout_map[] = {
+       [1] = 0b0001,  /* 1s  */
+       [2] = 0b0010,  /* 2s  */
+       [3] = 0b0011,  /* 3s  */
+       [4] = 0b0100,  /* 4s  */
+       [5] = 0b0101,  /* 5s  */
+       [6] = 0b0110,  /* 6s  */
+       [8] = 0b0111,  /* 8s  */
+       [10] = 0b1000, /* 10s */
+       [12] = 0b1001, /* 12s */
+       [14] = 0b1010, /* 14s */
+       [16] = 0b1011, /* 16s */
+};
+
+static int sunxi_wdt_ping(struct watchdog_device *wdt_dev)
+{
+       struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
+       void __iomem *wdt_base = sunxi_wdt->wdt_base;
+
+       iowrite32(WDT_CTRL_RELOAD, wdt_base + WDT_CTRL);
+
+       return 0;
+}
+
+static int sunxi_wdt_set_timeout(struct watchdog_device *wdt_dev,
+               unsigned int timeout)
+{
+       struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
+       void __iomem *wdt_base = sunxi_wdt->wdt_base;
+       u32 reg;
+
+       if (wdt_timeout_map[timeout] == 0)
+               timeout++;
+
+       sunxi_wdt->wdt_dev.timeout = timeout;
+
+       reg = ioread32(wdt_base + WDT_MODE);
+       reg &= ~WDT_TIMEOUT_MASK;
+       reg |= WDT_MODE_TIMEOUT(wdt_timeout_map[timeout]);
+       iowrite32(reg, wdt_base + WDT_MODE);
+
+       sunxi_wdt_ping(wdt_dev);
+
+       return 0;
+}
+
+static int sunxi_wdt_stop(struct watchdog_device *wdt_dev)
+{
+       struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
+       void __iomem *wdt_base = sunxi_wdt->wdt_base;
+
+       iowrite32(0, wdt_base + WDT_MODE);
+
+       return 0;
+}
+
+static int sunxi_wdt_start(struct watchdog_device *wdt_dev)
+{
+       u32 reg;
+       struct sunxi_wdt_dev *sunxi_wdt = watchdog_get_drvdata(wdt_dev);
+       void __iomem *wdt_base = sunxi_wdt->wdt_base;
+       int ret;
+
+       ret = sunxi_wdt_set_timeout(&sunxi_wdt->wdt_dev,
+                       sunxi_wdt->wdt_dev.timeout);
+       if (ret < 0)
+               return ret;
+
+       reg = ioread32(wdt_base + WDT_MODE);
+       reg |= (WDT_MODE_RST_EN | WDT_MODE_EN);
+       iowrite32(reg, wdt_base + WDT_MODE);
+
+       return 0;
+}
+
+static const struct watchdog_info sunxi_wdt_info = {
+       .identity       = DRV_NAME,
+       .options        = WDIOF_SETTIMEOUT |
+                         WDIOF_KEEPALIVEPING |
+                         WDIOF_MAGICCLOSE,
+};
+
+static const struct watchdog_ops sunxi_wdt_ops = {
+       .owner          = THIS_MODULE,
+       .start          = sunxi_wdt_start,
+       .stop           = sunxi_wdt_stop,
+       .ping           = sunxi_wdt_ping,
+       .set_timeout    = sunxi_wdt_set_timeout,
+};
+
+static int __init sunxi_wdt_probe(struct platform_device *pdev)
+{
+       struct sunxi_wdt_dev *sunxi_wdt;
+       struct resource *res;
+       int err;
+
+       sunxi_wdt = devm_kzalloc(&pdev->dev, sizeof(*sunxi_wdt), GFP_KERNEL);
+       if (!sunxi_wdt)
+               return -EINVAL;
+
+       platform_set_drvdata(pdev, sunxi_wdt);
+
+       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+       sunxi_wdt->wdt_base = devm_ioremap_resource(&pdev->dev, res);
+       if (IS_ERR(sunxi_wdt->wdt_base))
+               return PTR_ERR(sunxi_wdt->wdt_base);
+
+       sunxi_wdt->wdt_dev.info = &sunxi_wdt_info;
+       sunxi_wdt->wdt_dev.ops = &sunxi_wdt_ops;
+       sunxi_wdt->wdt_dev.timeout = WDT_MAX_TIMEOUT;
+       sunxi_wdt->wdt_dev.max_timeout = WDT_MAX_TIMEOUT;
+       sunxi_wdt->wdt_dev.min_timeout = WDT_MIN_TIMEOUT;
+       sunxi_wdt->wdt_dev.parent = &pdev->dev;
+
+       watchdog_init_timeout(&sunxi_wdt->wdt_dev, timeout, &pdev->dev);
+       watchdog_set_nowayout(&sunxi_wdt->wdt_dev, nowayout);
+
+       watchdog_set_drvdata(&sunxi_wdt->wdt_dev, sunxi_wdt);
+
+       sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
+
+       err = watchdog_register_device(&sunxi_wdt->wdt_dev);
+       if (unlikely(err))
+               return err;
+
+       dev_info(&pdev->dev, "Watchdog enabled (timeout=%d sec, nowayout=%d)",
+                       sunxi_wdt->wdt_dev.timeout, nowayout);
+
+       return 0;
+}
+
+static int __exit sunxi_wdt_remove(struct platform_device *pdev)
+{
+       struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev);
+
+       watchdog_unregister_device(&sunxi_wdt->wdt_dev);
+       watchdog_set_drvdata(&sunxi_wdt->wdt_dev, NULL);
+
+       return 0;
+}
+
+static void sunxi_wdt_shutdown(struct platform_device *pdev)
+{
+       struct sunxi_wdt_dev *sunxi_wdt = platform_get_drvdata(pdev);
+
+       sunxi_wdt_stop(&sunxi_wdt->wdt_dev);
+}
+
+static const struct of_device_id sunxi_wdt_dt_ids[] = {
+       { .compatible = "allwinner,sun4i-wdt" },
+       { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, sunxi_wdt_dt_ids);
+
+static struct platform_driver sunxi_wdt_driver = {
+       .probe          = sunxi_wdt_probe,
+       .remove         = sunxi_wdt_remove,
+       .shutdown       = sunxi_wdt_shutdown,
+       .driver         = {
+               .owner          = THIS_MODULE,
+               .name           = DRV_NAME,
+               .of_match_table = of_match_ptr(sunxi_wdt_dt_ids)
+       },
+};
+
+module_platform_driver(sunxi_wdt_driver);
+
+module_param(timeout, uint, 0);
+MODULE_PARM_DESC(timeout, "Watchdog heartbeat in seconds");
+
+module_param(nowayout, bool, 0);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+               "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Carlo Caione <carlo.caione@gmail.com>");
+MODULE_AUTHOR("Henrik Nordstrom <henrik@henriknordstrom.net>");
+MODULE_DESCRIPTION("sunxi WatchDog Timer Driver");
+MODULE_VERSION(DRV_VERSION);
index 4da59b4..42913f1 100644 (file)
@@ -403,21 +403,11 @@ static int ts72xx_wdt_probe(struct platform_device *pdev)
        }
 
        r1 = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!r1) {
-               dev_err(&pdev->dev, "failed to get memory resource\n");
-               return -ENODEV;
-       }
-
        wdt->control_reg = devm_ioremap_resource(&pdev->dev, r1);
        if (IS_ERR(wdt->control_reg))
                return PTR_ERR(wdt->control_reg);
 
        r2 = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       if (!r2) {
-               dev_err(&pdev->dev, "failed to get memory resource\n");
-               return -ENODEV;
-       }
-
        wdt->feed_reg = devm_ioremap_resource(&pdev->dev, r2);
        if (IS_ERR(wdt->feed_reg))
                return PTR_ERR(wdt->feed_reg);
index 3101cf6..a50c6e3 100644 (file)
@@ -349,8 +349,6 @@ static enum bp_state increase_reservation(unsigned long nr_pages)
                BUG_ON(page == NULL);
 
                pfn = page_to_pfn(page);
-               BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) &&
-                      phys_to_machine_mapping_valid(pfn));
 
                set_phys_to_machine(pfn, frame_list[i]);
 
@@ -380,6 +378,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
        enum bp_state state = BP_DONE;
        unsigned long  pfn, i;
        struct page   *page;
+       struct page   *scratch_page;
        int ret;
        struct xen_memory_reservation reservation = {
                .address_bits = 0,
@@ -399,6 +398,8 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
        if (nr_pages > ARRAY_SIZE(frame_list))
                nr_pages = ARRAY_SIZE(frame_list);
 
+       scratch_page = get_balloon_scratch_page();
+
        for (i = 0; i < nr_pages; i++) {
                page = alloc_page(gfp);
                if (page == NULL) {
@@ -416,7 +417,7 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                if (xen_pv_domain() && !PageHighMem(page)) {
                        ret = HYPERVISOR_update_va_mapping(
                                (unsigned long)__va(pfn << PAGE_SHIFT),
-                               pfn_pte(page_to_pfn(__get_cpu_var(balloon_scratch_page)),
+                               pfn_pte(page_to_pfn(scratch_page),
                                        PAGE_KERNEL_RO), 0);
                        BUG_ON(ret);
                }
@@ -432,14 +433,14 @@ static enum bp_state decrease_reservation(unsigned long nr_pages, gfp_t gfp)
                pfn = mfn_to_pfn(frame_list[i]);
                if (!xen_feature(XENFEAT_auto_translated_physmap)) {
                        unsigned long p;
-                       struct page *pg;
-                       pg = __get_cpu_var(balloon_scratch_page);
-                       p = page_to_pfn(pg);
+                       p = page_to_pfn(scratch_page);
                        __set_phys_to_machine(pfn, pfn_to_mfn(p));
                }
                balloon_append(pfn_to_page(pfn));
        }
 
+       put_balloon_scratch_page();
+
        set_xen_guest_handle(reservation.extent_start, frame_list);
        reservation.nr_extents   = nr_pages;
        ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation);
index d384a8b..aa5ecf4 100644 (file)
@@ -183,7 +183,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
        else
                flock.length = fl->fl_end - fl->fl_start + 1;
        flock.proc_id = fl->fl_pid;
-       flock.client_id = utsname()->nodename;
+       flock.client_id = fid->clnt->name;
        if (IS_SETLKW(cmd))
                flock.flags = P9_LOCK_FLAGS_BLOCK;
 
@@ -260,7 +260,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
        else
                glock.length = fl->fl_end - fl->fl_start + 1;
        glock.proc_id = fl->fl_pid;
-       glock.client_id = utsname()->nodename;
+       glock.client_id = fid->clnt->name;
 
        res = p9_client_getlock_dotl(fid, &glock);
        if (res < 0)
index 25b018e..94de6d1 100644 (file)
@@ -146,7 +146,7 @@ static umode_t p9mode2unixmode(struct v9fs_session_info *v9ses,
                char type = 0, ext[32];
                int major = -1, minor = -1;
 
-               strncpy(ext, stat->extension, sizeof(ext));
+               strlcpy(ext, stat->extension, sizeof(ext));
                sscanf(ext, "%c %u %u", &type, &major, &minor);
                switch (type) {
                case 'c':
@@ -1186,7 +1186,7 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                         * this even with .u extension. So check
                         * for non NULL stat->extension
                         */
-                       strncpy(ext, stat->extension, sizeof(ext));
+                       strlcpy(ext, stat->extension, sizeof(ext));
                        /* HARDLINKCOUNT %u */
                        sscanf(ext, "%13s %u", tag_name, &i_nlink);
                        if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
index 5f95d1e..b9acada 100644 (file)
@@ -50,7 +50,7 @@ static void adfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size)
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
 }
 
 static int adfs_write_begin(struct file *file, struct address_space *mapping,
index af3261b..8669b6e 100644 (file)
@@ -406,7 +406,7 @@ static void affs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                affs_truncate(inode);
        }
 }
@@ -836,7 +836,7 @@ affs_truncate(struct inode *inode)
                struct address_space *mapping = inode->i_mapping;
                struct page *page;
                void *fsdata;
-               u32 size = inode->i_size;
+               loff_t size = inode->i_size;
                int res;
 
                res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
index 9b5ca11..6b868f0 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -26,6 +26,7 @@
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/mmu_context.h>
+#include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
 #include <linux/eventfd.h>
 #include <linux/blkdev.h>
 #include <linux/compat.h>
+#include <linux/anon_inodes.h>
+#include <linux/migrate.h>
+#include <linux/ramfs.h>
+#include <linux/percpu-refcount.h>
 
 #include <asm/kmap_types.h>
 #include <asm/uaccess.h>
@@ -61,14 +66,29 @@ struct aio_ring {
 
 #define AIO_RING_PAGES 8
 
+struct kioctx_table {
+       struct rcu_head rcu;
+       unsigned        nr;
+       struct kioctx   *table[];
+};
+
+struct kioctx_cpu {
+       unsigned                reqs_available;
+};
+
 struct kioctx {
-       atomic_t                users;
+       struct percpu_ref       users;
        atomic_t                dead;
 
-       /* This needs improving */
        unsigned long           user_id;
-       struct hlist_node       list;
 
+       struct __percpu kioctx_cpu *cpu;
+
+       /*
+        * For percpu reqs_available, number of slots we move to/from global
+        * counter at a time:
+        */
+       unsigned                req_batch;
        /*
         * This is what userspace passed to io_setup(), it's not used for
         * anything but counting against the global max_reqs quota.
@@ -88,10 +108,18 @@ struct kioctx {
        long                    nr_pages;
 
        struct rcu_head         rcu_head;
-       struct work_struct      rcu_work;
+       struct work_struct      free_work;
 
        struct {
-               atomic_t        reqs_active;
+               /*
+                * This counts the number of available slots in the ringbuffer,
+                * so we avoid overflowing it: it's decremented (if positive)
+                * when allocating a kiocb and incremented when the resulting
+                * io_event is pulled off the ringbuffer.
+                *
+                * We batch accesses to it with a percpu version.
+                */
+               atomic_t        reqs_available;
        } ____cacheline_aligned_in_smp;
 
        struct {
@@ -110,6 +138,9 @@ struct kioctx {
        } ____cacheline_aligned_in_smp;
 
        struct page             *internal_pages[AIO_RING_PAGES];
+       struct file             *aio_ring_file;
+
+       unsigned                id;
 };
 
 /*------ sysctl variables----*/
@@ -138,15 +169,77 @@ __initcall(aio_setup);
 
 static void aio_free_ring(struct kioctx *ctx)
 {
-       long i;
+       int i;
+       struct file *aio_ring_file = ctx->aio_ring_file;
 
-       for (i = 0; i < ctx->nr_pages; i++)
+       for (i = 0; i < ctx->nr_pages; i++) {
+               pr_debug("pid(%d) [%d] page->count=%d\n", current->pid, i,
+                               page_count(ctx->ring_pages[i]));
                put_page(ctx->ring_pages[i]);
+       }
 
        if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
                kfree(ctx->ring_pages);
+
+       if (aio_ring_file) {
+               truncate_setsize(aio_ring_file->f_inode, 0);
+               fput(aio_ring_file);
+               ctx->aio_ring_file = NULL;
+       }
+}
+
+static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       vma->vm_ops = &generic_file_vm_ops;
+       return 0;
 }
 
+static const struct file_operations aio_ring_fops = {
+       .mmap = aio_ring_mmap,
+};
+
+static int aio_set_page_dirty(struct page *page)
+{
+       return 0;
+}
+
+#if IS_ENABLED(CONFIG_MIGRATION)
+static int aio_migratepage(struct address_space *mapping, struct page *new,
+                       struct page *old, enum migrate_mode mode)
+{
+       struct kioctx *ctx = mapping->private_data;
+       unsigned long flags;
+       unsigned idx = old->index;
+       int rc;
+
+       /* Writeback must be complete */
+       BUG_ON(PageWriteback(old));
+       put_page(old);
+
+       rc = migrate_page_move_mapping(mapping, new, old, NULL, mode);
+       if (rc != MIGRATEPAGE_SUCCESS) {
+               get_page(old);
+               return rc;
+       }
+
+       get_page(new);
+
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+       migrate_page_copy(new, old);
+       ctx->ring_pages[idx] = new;
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
+       return rc;
+}
+#endif
+
+static const struct address_space_operations aio_ctx_aops = {
+       .set_page_dirty = aio_set_page_dirty,
+#if IS_ENABLED(CONFIG_MIGRATION)
+       .migratepage    = aio_migratepage,
+#endif
+};
+
 static int aio_setup_ring(struct kioctx *ctx)
 {
        struct aio_ring *ring;
@@ -154,20 +247,45 @@ static int aio_setup_ring(struct kioctx *ctx)
        struct mm_struct *mm = current->mm;
        unsigned long size, populate;
        int nr_pages;
+       int i;
+       struct file *file;
 
        /* Compensate for the ring buffer's head/tail overlap entry */
        nr_events += 2; /* 1 is required, 2 for good luck */
 
        size = sizeof(struct aio_ring);
        size += sizeof(struct io_event) * nr_events;
-       nr_pages = (size + PAGE_SIZE-1) >> PAGE_SHIFT;
 
+       nr_pages = PFN_UP(size);
        if (nr_pages < 0)
                return -EINVAL;
 
-       nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
+       file = anon_inode_getfile_private("[aio]", &aio_ring_fops, ctx, O_RDWR);
+       if (IS_ERR(file)) {
+               ctx->aio_ring_file = NULL;
+               return -EAGAIN;
+       }
+
+       file->f_inode->i_mapping->a_ops = &aio_ctx_aops;
+       file->f_inode->i_mapping->private_data = ctx;
+       file->f_inode->i_size = PAGE_SIZE * (loff_t)nr_pages;
+
+       for (i = 0; i < nr_pages; i++) {
+               struct page *page;
+               page = find_or_create_page(file->f_inode->i_mapping,
+                                          i, GFP_HIGHUSER | __GFP_ZERO);
+               if (!page)
+                       break;
+               pr_debug("pid(%d) page[%d]->count=%d\n",
+                        current->pid, i, page_count(page));
+               SetPageUptodate(page);
+               SetPageDirty(page);
+               unlock_page(page);
+       }
+       ctx->aio_ring_file = file;
+       nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring))
+                       / sizeof(struct io_event);
 
-       ctx->nr_events = 0;
        ctx->ring_pages = ctx->internal_pages;
        if (nr_pages > AIO_RING_PAGES) {
                ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
@@ -178,10 +296,11 @@ static int aio_setup_ring(struct kioctx *ctx)
 
        ctx->mmap_size = nr_pages * PAGE_SIZE;
        pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
+
        down_write(&mm->mmap_sem);
-       ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size,
-                                      PROT_READ|PROT_WRITE,
-                                      MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate);
+       ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size,
+                                      PROT_READ | PROT_WRITE,
+                                      MAP_SHARED | MAP_POPULATE, 0, &populate);
        if (IS_ERR((void *)ctx->mmap_base)) {
                up_write(&mm->mmap_sem);
                ctx->mmap_size = 0;
@@ -190,23 +309,34 @@ static int aio_setup_ring(struct kioctx *ctx)
        }
 
        pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
+
+       /* We must do this while still holding mmap_sem for write, as we
+        * need to be protected against userspace attempting to mremap()
+        * or munmap() the ring buffer.
+        */
        ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
                                       1, 0, ctx->ring_pages, NULL);
+
+       /* Dropping the reference here is safe as the page cache will hold
+        * onto the pages for us.  It is also required so that page migration
+        * can unmap the pages and get the right reference count.
+        */
+       for (i = 0; i < ctx->nr_pages; i++)
+               put_page(ctx->ring_pages[i]);
+
        up_write(&mm->mmap_sem);
 
        if (unlikely(ctx->nr_pages != nr_pages)) {
                aio_free_ring(ctx);
                return -EAGAIN;
        }
-       if (populate)
-               mm_populate(ctx->mmap_base, populate);
 
        ctx->user_id = ctx->mmap_base;
        ctx->nr_events = nr_events; /* trusted copy */
 
        ring = kmap_atomic(ctx->ring_pages[0]);
        ring->nr = nr_events;   /* user copy */
-       ring->id = ctx->user_id;
+       ring->id = ~0U;
        ring->head = ring->tail = 0;
        ring->magic = AIO_RING_MAGIC;
        ring->compat_features = AIO_RING_COMPAT_FEATURES;
@@ -238,11 +368,9 @@ void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
 }
 EXPORT_SYMBOL(kiocb_set_cancel_fn);
 
-static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
-                       struct io_event *res)
+static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb)
 {
        kiocb_cancel_fn *old, *cancel;
-       int ret = -EINVAL;
 
        /*
         * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
@@ -252,28 +380,20 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
        cancel = ACCESS_ONCE(kiocb->ki_cancel);
        do {
                if (!cancel || cancel == KIOCB_CANCELLED)
-                       return ret;
+                       return -EINVAL;
 
                old = cancel;
                cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
        } while (cancel != old);
 
-       atomic_inc(&kiocb->ki_users);
-       spin_unlock_irq(&ctx->ctx_lock);
-
-       memset(res, 0, sizeof(*res));
-       res->obj = (u64)(unsigned long)kiocb->ki_obj.user;
-       res->data = kiocb->ki_user_data;
-       ret = cancel(kiocb, res);
-
-       spin_lock_irq(&ctx->ctx_lock);
-
-       return ret;
+       return cancel(kiocb);
 }
 
 static void free_ioctx_rcu(struct rcu_head *head)
 {
        struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+
+       free_percpu(ctx->cpu);
        kmem_cache_free(kioctx_cachep, ctx);
 }
 
@@ -282,12 +402,13 @@ static void free_ioctx_rcu(struct rcu_head *head)
  * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
  * now it's safe to cancel any that need to be.
  */
-static void free_ioctx(struct kioctx *ctx)
+static void free_ioctx(struct work_struct *work)
 {
+       struct kioctx *ctx = container_of(work, struct kioctx, free_work);
        struct aio_ring *ring;
-       struct io_event res;
        struct kiocb *req;
-       unsigned head, avail;
+       unsigned cpu, avail;
+       DEFINE_WAIT(wait);
 
        spin_lock_irq(&ctx->ctx_lock);
 
@@ -296,28 +417,38 @@ static void free_ioctx(struct kioctx *ctx)
                                       struct kiocb, ki_list);
 
                list_del_init(&req->ki_list);
-               kiocb_cancel(ctx, req, &res);
+               kiocb_cancel(ctx, req);
        }
 
        spin_unlock_irq(&ctx->ctx_lock);
 
-       ring = kmap_atomic(ctx->ring_pages[0]);
-       head = ring->head;
-       kunmap_atomic(ring);
+       for_each_possible_cpu(cpu) {
+               struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
 
-       while (atomic_read(&ctx->reqs_active) > 0) {
-               wait_event(ctx->wait,
-                               head != ctx->tail ||
-                               atomic_read(&ctx->reqs_active) <= 0);
+               atomic_add(kcpu->reqs_available, &ctx->reqs_available);
+               kcpu->reqs_available = 0;
+       }
 
-               avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
+       while (1) {
+               prepare_to_wait(&ctx->wait, &wait, TASK_UNINTERRUPTIBLE);
 
-               atomic_sub(avail, &ctx->reqs_active);
-               head += avail;
-               head %= ctx->nr_events;
+               ring = kmap_atomic(ctx->ring_pages[0]);
+               avail = (ring->head <= ring->tail)
+                        ? ring->tail - ring->head
+                        : ctx->nr_events - ring->head + ring->tail;
+
+               atomic_add(avail, &ctx->reqs_available);
+               ring->head = ring->tail;
+               kunmap_atomic(ring);
+
+               if (atomic_read(&ctx->reqs_available) >= ctx->nr_events - 1)
+                       break;
+
+               schedule();
        }
+       finish_wait(&ctx->wait, &wait);
 
-       WARN_ON(atomic_read(&ctx->reqs_active) < 0);
+       WARN_ON(atomic_read(&ctx->reqs_available) > ctx->nr_events - 1);
 
        aio_free_ring(ctx);
 
@@ -333,10 +464,68 @@ static void free_ioctx(struct kioctx *ctx)
        call_rcu(&ctx->rcu_head, free_ioctx_rcu);
 }
 
-static void put_ioctx(struct kioctx *ctx)
+static void free_ioctx_ref(struct percpu_ref *ref)
 {
-       if (unlikely(atomic_dec_and_test(&ctx->users)))
-               free_ioctx(ctx);
+       struct kioctx *ctx = container_of(ref, struct kioctx, users);
+
+       INIT_WORK(&ctx->free_work, free_ioctx);
+       schedule_work(&ctx->free_work);
+}
+
+static int ioctx_add_table(struct kioctx *ctx, struct mm_struct *mm)
+{
+       unsigned i, new_nr;
+       struct kioctx_table *table, *old;
+       struct aio_ring *ring;
+
+       spin_lock(&mm->ioctx_lock);
+       rcu_read_lock();
+       table = rcu_dereference(mm->ioctx_table);
+
+       while (1) {
+               if (table)
+                       for (i = 0; i < table->nr; i++)
+                               if (!table->table[i]) {
+                                       ctx->id = i;
+                                       table->table[i] = ctx;
+                                       rcu_read_unlock();
+                                       spin_unlock(&mm->ioctx_lock);
+
+                                       ring = kmap_atomic(ctx->ring_pages[0]);
+                                       ring->id = ctx->id;
+                                       kunmap_atomic(ring);
+                                       return 0;
+                               }
+
+               new_nr = (table ? table->nr : 1) * 4;
+
+               rcu_read_unlock();
+               spin_unlock(&mm->ioctx_lock);
+
+               table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
+                               new_nr, GFP_KERNEL);
+               if (!table)
+                       return -ENOMEM;
+
+               table->nr = new_nr;
+
+               spin_lock(&mm->ioctx_lock);
+               rcu_read_lock();
+               old = rcu_dereference(mm->ioctx_table);
+
+               if (!old) {
+                       rcu_assign_pointer(mm->ioctx_table, table);
+               } else if (table->nr > old->nr) {
+                       memcpy(table->table, old->table,
+                              old->nr * sizeof(struct kioctx *));
+
+                       rcu_assign_pointer(mm->ioctx_table, table);
+                       kfree_rcu(old, rcu);
+               } else {
+                       kfree(table);
+                       table = old;
+               }
+       }
 }
 
 /* ioctx_alloc
@@ -348,6 +537,18 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        struct kioctx *ctx;
        int err = -ENOMEM;
 
+       /*
+        * We keep track of the number of available ringbuffer slots, to prevent
+        * overflow (reqs_available), and we also use percpu counters for this.
+        *
+        * So since up to half the slots might be on other cpu's percpu counters
+        * and unavailable, double nr_events so userspace sees what they
+        * expected: additionally, we move req_batch slots to/from percpu
+        * counters at a time, so make sure that isn't 0:
+        */
+       nr_events = max(nr_events, num_possible_cpus() * 4);
+       nr_events *= 2;
+
        /* Prevent overflows */
        if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
            (nr_events > (0x10000000U / sizeof(struct kiocb)))) {
@@ -355,7 +556,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
                return ERR_PTR(-EINVAL);
        }
 
-       if (!nr_events || (unsigned long)nr_events > aio_max_nr)
+       if (!nr_events || (unsigned long)nr_events > (aio_max_nr * 2UL))
                return ERR_PTR(-EAGAIN);
 
        ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -364,8 +565,9 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        ctx->max_reqs = nr_events;
 
-       atomic_set(&ctx->users, 2);
-       atomic_set(&ctx->dead, 0);
+       if (percpu_ref_init(&ctx->users, free_ioctx_ref))
+               goto out_freectx;
+
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->completion_lock);
        mutex_init(&ctx->ring_lock);
@@ -373,12 +575,21 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        INIT_LIST_HEAD(&ctx->active_reqs);
 
+       ctx->cpu = alloc_percpu(struct kioctx_cpu);
+       if (!ctx->cpu)
+               goto out_freeref;
+
        if (aio_setup_ring(ctx) < 0)
-               goto out_freectx;
+               goto out_freepcpu;
+
+       atomic_set(&ctx->reqs_available, ctx->nr_events - 1);
+       ctx->req_batch = (ctx->nr_events - 1) / (num_possible_cpus() * 4);
+       if (ctx->req_batch < 1)
+               ctx->req_batch = 1;
 
        /* limit the number of system wide aios */
        spin_lock(&aio_nr_lock);
-       if (aio_nr + nr_events > aio_max_nr ||
+       if (aio_nr + nr_events > (aio_max_nr * 2UL) ||
            aio_nr + nr_events < aio_nr) {
                spin_unlock(&aio_nr_lock);
                goto out_cleanup;
@@ -386,49 +597,54 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
        aio_nr += ctx->max_reqs;
        spin_unlock(&aio_nr_lock);
 
-       /* now link into global list. */
-       spin_lock(&mm->ioctx_lock);
-       hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
-       spin_unlock(&mm->ioctx_lock);
+       percpu_ref_get(&ctx->users); /* io_setup() will drop this ref */
+
+       err = ioctx_add_table(ctx, mm);
+       if (err)
+               goto out_cleanup_put;
 
        pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
                 ctx, ctx->user_id, mm, ctx->nr_events);
        return ctx;
 
+out_cleanup_put:
+       percpu_ref_put(&ctx->users);
 out_cleanup:
        err = -EAGAIN;
        aio_free_ring(ctx);
+out_freepcpu:
+       free_percpu(ctx->cpu);
+out_freeref:
+       free_percpu(ctx->users.pcpu_count);
 out_freectx:
+       if (ctx->aio_ring_file)
+               fput(ctx->aio_ring_file);
        kmem_cache_free(kioctx_cachep, ctx);
        pr_debug("error allocating ioctx %d\n", err);
        return ERR_PTR(err);
 }
 
-static void kill_ioctx_work(struct work_struct *work)
-{
-       struct kioctx *ctx = container_of(work, struct kioctx, rcu_work);
-
-       wake_up_all(&ctx->wait);
-       put_ioctx(ctx);
-}
-
-static void kill_ioctx_rcu(struct rcu_head *head)
-{
-       struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
-
-       INIT_WORK(&ctx->rcu_work, kill_ioctx_work);
-       schedule_work(&ctx->rcu_work);
-}
-
 /* kill_ioctx
  *     Cancels all outstanding aio requests on an aio context.  Used
  *     when the processes owning a context have all exited to encourage
  *     the rapid destruction of the kioctx.
  */
-static void kill_ioctx(struct kioctx *ctx)
+static void kill_ioctx(struct mm_struct *mm, struct kioctx *ctx)
 {
        if (!atomic_xchg(&ctx->dead, 1)) {
-               hlist_del_rcu(&ctx->list);
+               struct kioctx_table *table;
+
+               spin_lock(&mm->ioctx_lock);
+               rcu_read_lock();
+               table = rcu_dereference(mm->ioctx_table);
+
+               WARN_ON(ctx != table->table[ctx->id]);
+               table->table[ctx->id] = NULL;
+               rcu_read_unlock();
+               spin_unlock(&mm->ioctx_lock);
+
+               /* percpu_ref_kill() will do the necessary call_rcu() */
+               wake_up_all(&ctx->wait);
 
                /*
                 * It'd be more correct to do this in free_ioctx(), after all
@@ -445,24 +661,23 @@ static void kill_ioctx(struct kioctx *ctx)
                if (ctx->mmap_size)
                        vm_munmap(ctx->mmap_base, ctx->mmap_size);
 
-               /* Between hlist_del_rcu() and dropping the initial ref */
-               call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
+               percpu_ref_kill(&ctx->users);
        }
 }
 
 /* wait_on_sync_kiocb:
  *     Waits on the given sync kiocb to complete.
  */
-ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
+ssize_t wait_on_sync_kiocb(struct kiocb *req)
 {
-       while (atomic_read(&iocb->ki_users)) {
+       while (!req->ki_ctx) {
                set_current_state(TASK_UNINTERRUPTIBLE);
-               if (!atomic_read(&iocb->ki_users))
+               if (req->ki_ctx)
                        break;
                io_schedule();
        }
        __set_current_state(TASK_RUNNING);
-       return iocb->ki_user_data;
+       return req->ki_user_data;
 }
 EXPORT_SYMBOL(wait_on_sync_kiocb);
 
@@ -476,16 +691,28 @@ EXPORT_SYMBOL(wait_on_sync_kiocb);
  */
 void exit_aio(struct mm_struct *mm)
 {
+       struct kioctx_table *table;
        struct kioctx *ctx;
-       struct hlist_node *n;
-
-       hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
-               if (1 != atomic_read(&ctx->users))
-                       printk(KERN_DEBUG
-                               "exit_aio:ioctx still alive: %d %d %d\n",
-                               atomic_read(&ctx->users),
-                               atomic_read(&ctx->dead),
-                               atomic_read(&ctx->reqs_active));
+       unsigned i = 0;
+
+       while (1) {
+               rcu_read_lock();
+               table = rcu_dereference(mm->ioctx_table);
+
+               do {
+                       if (!table || i >= table->nr) {
+                               rcu_read_unlock();
+                               rcu_assign_pointer(mm->ioctx_table, NULL);
+                               if (table)
+                                       kfree(table);
+                               return;
+                       }
+
+                       ctx = table->table[i++];
+               } while (!ctx);
+
+               rcu_read_unlock();
+
                /*
                 * We don't need to bother with munmap() here -
                 * exit_mmap(mm) is coming and it'll unmap everything.
@@ -496,40 +723,75 @@ void exit_aio(struct mm_struct *mm)
                 */
                ctx->mmap_size = 0;
 
-               kill_ioctx(ctx);
+               kill_ioctx(mm, ctx);
+       }
+}
+
+static void put_reqs_available(struct kioctx *ctx, unsigned nr)
+{
+       struct kioctx_cpu *kcpu;
+
+       preempt_disable();
+       kcpu = this_cpu_ptr(ctx->cpu);
+
+       kcpu->reqs_available += nr;
+       while (kcpu->reqs_available >= ctx->req_batch * 2) {
+               kcpu->reqs_available -= ctx->req_batch;
+               atomic_add(ctx->req_batch, &ctx->reqs_available);
+       }
+
+       preempt_enable();
+}
+
+static bool get_reqs_available(struct kioctx *ctx)
+{
+       struct kioctx_cpu *kcpu;
+       bool ret = false;
+
+       preempt_disable();
+       kcpu = this_cpu_ptr(ctx->cpu);
+
+       if (!kcpu->reqs_available) {
+               int old, avail = atomic_read(&ctx->reqs_available);
+
+               do {
+                       if (avail < ctx->req_batch)
+                               goto out;
+
+                       old = avail;
+                       avail = atomic_cmpxchg(&ctx->reqs_available,
+                                              avail, avail - ctx->req_batch);
+               } while (avail != old);
+
+               kcpu->reqs_available += ctx->req_batch;
        }
+
+       ret = true;
+       kcpu->reqs_available--;
+out:
+       preempt_enable();
+       return ret;
 }
 
 /* aio_get_req
- *     Allocate a slot for an aio request.  Increments the ki_users count
- * of the kioctx so that the kioctx stays around until all requests are
- * complete.  Returns NULL if no requests are free.
- *
- * Returns with kiocb->ki_users set to 2.  The io submit code path holds
- * an extra reference while submitting the i/o.
- * This prevents races between the aio code path referencing the
- * req (after submitting it) and aio_complete() freeing the req.
+ *     Allocate a slot for an aio request.
+ * Returns NULL if no requests are free.
  */
 static inline struct kiocb *aio_get_req(struct kioctx *ctx)
 {
        struct kiocb *req;
 
-       if (atomic_read(&ctx->reqs_active) >= ctx->nr_events)
+       if (!get_reqs_available(ctx))
                return NULL;
 
-       if (atomic_inc_return(&ctx->reqs_active) > ctx->nr_events - 1)
-               goto out_put;
-
        req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
        if (unlikely(!req))
                goto out_put;
 
-       atomic_set(&req->ki_users, 2);
        req->ki_ctx = ctx;
-
        return req;
 out_put:
-       atomic_dec(&ctx->reqs_active);
+       put_reqs_available(ctx, 1);
        return NULL;
 }
 
@@ -539,35 +801,32 @@ static void kiocb_free(struct kiocb *req)
                fput(req->ki_filp);
        if (req->ki_eventfd != NULL)
                eventfd_ctx_put(req->ki_eventfd);
-       if (req->ki_dtor)
-               req->ki_dtor(req);
-       if (req->ki_iovec != &req->ki_inline_vec)
-               kfree(req->ki_iovec);
        kmem_cache_free(kiocb_cachep, req);
 }
 
-void aio_put_req(struct kiocb *req)
-{
-       if (atomic_dec_and_test(&req->ki_users))
-               kiocb_free(req);
-}
-EXPORT_SYMBOL(aio_put_req);
-
 static struct kioctx *lookup_ioctx(unsigned long ctx_id)
 {
+       struct aio_ring __user *ring  = (void __user *)ctx_id;
        struct mm_struct *mm = current->mm;
        struct kioctx *ctx, *ret = NULL;
+       struct kioctx_table *table;
+       unsigned id;
+
+       if (get_user(id, &ring->id))
+               return NULL;
 
        rcu_read_lock();
+       table = rcu_dereference(mm->ioctx_table);
 
-       hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
-               if (ctx->user_id == ctx_id) {
-                       atomic_inc(&ctx->users);
-                       ret = ctx;
-                       break;
-               }
-       }
+       if (!table || id >= table->nr)
+               goto out;
 
+       ctx = table->table[id];
+       if (ctx && ctx->user_id == ctx_id) {
+               percpu_ref_get(&ctx->users);
+               ret = ctx;
+       }
+out:
        rcu_read_unlock();
        return ret;
 }
@@ -591,16 +850,16 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
         *  - the sync task helpfully left a reference to itself in the iocb
         */
        if (is_sync_kiocb(iocb)) {
-               BUG_ON(atomic_read(&iocb->ki_users) != 1);
                iocb->ki_user_data = res;
-               atomic_set(&iocb->ki_users, 0);
+               smp_wmb();
+               iocb->ki_ctx = ERR_PTR(-EXDEV);
                wake_up_process(iocb->ki_obj.tsk);
                return;
        }
 
        /*
         * Take rcu_read_lock() in case the kioctx is being destroyed, as we
-        * need to issue a wakeup after decrementing reqs_active.
+        * need to issue a wakeup after incrementing reqs_available.
         */
        rcu_read_lock();
 
@@ -612,17 +871,6 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
                spin_unlock_irqrestore(&ctx->ctx_lock, flags);
        }
 
-       /*
-        * cancelled requests don't get events, userland was given one
-        * when the event got cancelled.
-        */
-       if (unlikely(xchg(&iocb->ki_cancel,
-                         KIOCB_CANCELLED) == KIOCB_CANCELLED)) {
-               atomic_dec(&ctx->reqs_active);
-               /* Still need the wake_up in case free_ioctx is waiting */
-               goto put_rq;
-       }
-
        /*
         * Add a completion event to the ring buffer. Must be done holding
         * ctx->completion_lock to prevent other code from messing with the tail
@@ -675,9 +923,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        if (iocb->ki_eventfd != NULL)
                eventfd_signal(iocb->ki_eventfd, 1);
 
-put_rq:
        /* everything turned out well, dispose of the aiocb. */
-       aio_put_req(iocb);
+       kiocb_free(iocb);
 
        /*
         * We have to order our ring_info tail store above and test
@@ -702,7 +949,7 @@ static long aio_read_events_ring(struct kioctx *ctx,
                                 struct io_event __user *event, long nr)
 {
        struct aio_ring *ring;
-       unsigned head, pos;
+       unsigned head, tail, pos;
        long ret = 0;
        int copy_ret;
 
@@ -710,11 +957,12 @@ static long aio_read_events_ring(struct kioctx *ctx,
 
        ring = kmap_atomic(ctx->ring_pages[0]);
        head = ring->head;
+       tail = ring->tail;
        kunmap_atomic(ring);
 
-       pr_debug("h%u t%u m%u\n", head, ctx->tail, ctx->nr_events);
+       pr_debug("h%u t%u m%u\n", head, tail, ctx->nr_events);
 
-       if (head == ctx->tail)
+       if (head == tail)
                goto out;
 
        while (ret < nr) {
@@ -722,8 +970,8 @@ static long aio_read_events_ring(struct kioctx *ctx,
                struct io_event *ev;
                struct page *page;
 
-               avail = (head <= ctx->tail ? ctx->tail : ctx->nr_events) - head;
-               if (head == ctx->tail)
+               avail = (head <= tail ?  tail : ctx->nr_events) - head;
+               if (head == tail)
                        break;
 
                avail = min(avail, nr - ret);
@@ -754,9 +1002,9 @@ static long aio_read_events_ring(struct kioctx *ctx,
        kunmap_atomic(ring);
        flush_dcache_page(ctx->ring_pages[0]);
 
-       pr_debug("%li  h%u t%u\n", ret, head, ctx->tail);
+       pr_debug("%li  h%u t%u\n", ret, head, tail);
 
-       atomic_sub(ret, &ctx->reqs_active);
+       put_reqs_available(ctx, ret);
 out:
        mutex_unlock(&ctx->ring_lock);
 
@@ -854,8 +1102,8 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
        if (!IS_ERR(ioctx)) {
                ret = put_user(ioctx->user_id, ctxp);
                if (ret)
-                       kill_ioctx(ioctx);
-               put_ioctx(ioctx);
+                       kill_ioctx(current->mm, ioctx);
+               percpu_ref_put(&ioctx->users);
        }
 
 out:
@@ -872,101 +1120,37 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
 {
        struct kioctx *ioctx = lookup_ioctx(ctx);
        if (likely(NULL != ioctx)) {
-               kill_ioctx(ioctx);
-               put_ioctx(ioctx);
+               kill_ioctx(current->mm, ioctx);
+               percpu_ref_put(&ioctx->users);
                return 0;
        }
        pr_debug("EINVAL: io_destroy: invalid context id\n");
        return -EINVAL;
 }
 
-static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
-{
-       struct iovec *iov = &iocb->ki_iovec[iocb->ki_cur_seg];
-
-       BUG_ON(ret <= 0);
-
-       while (iocb->ki_cur_seg < iocb->ki_nr_segs && ret > 0) {
-               ssize_t this = min((ssize_t)iov->iov_len, ret);
-               iov->iov_base += this;
-               iov->iov_len -= this;
-               iocb->ki_left -= this;
-               ret -= this;
-               if (iov->iov_len == 0) {
-                       iocb->ki_cur_seg++;
-                       iov++;
-               }
-       }
-
-       /* the caller should not have done more io than what fit in
-        * the remaining iovecs */
-       BUG_ON(ret > 0 && iocb->ki_left == 0);
-}
-
 typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
                            unsigned long, loff_t);
 
-static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op)
-{
-       struct file *file = iocb->ki_filp;
-       struct address_space *mapping = file->f_mapping;
-       struct inode *inode = mapping->host;
-       ssize_t ret = 0;
-
-       /* This matches the pread()/pwrite() logic */
-       if (iocb->ki_pos < 0)
-               return -EINVAL;
-
-       if (rw == WRITE)
-               file_start_write(file);
-       do {
-               ret = rw_op(iocb, &iocb->ki_iovec[iocb->ki_cur_seg],
-                           iocb->ki_nr_segs - iocb->ki_cur_seg,
-                           iocb->ki_pos);
-               if (ret > 0)
-                       aio_advance_iovec(iocb, ret);
-
-       /* retry all partial writes.  retry partial reads as long as its a
-        * regular file. */
-       } while (ret > 0 && iocb->ki_left > 0 &&
-                (rw == WRITE ||
-                 (!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
-       if (rw == WRITE)
-               file_end_write(file);
-
-       /* This means we must have transferred all that we could */
-       /* No need to retry anymore */
-       if ((ret == 0) || (iocb->ki_left == 0))
-               ret = iocb->ki_nbytes - iocb->ki_left;
-
-       /* If we managed to write some out we return that, rather than
-        * the eventual error. */
-       if (rw == WRITE
-           && ret < 0 && ret != -EIOCBQUEUED
-           && iocb->ki_nbytes - iocb->ki_left)
-               ret = iocb->ki_nbytes - iocb->ki_left;
-
-       return ret;
-}
-
-static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat)
+static ssize_t aio_setup_vectored_rw(struct kiocb *kiocb,
+                                    int rw, char __user *buf,
+                                    unsigned long *nr_segs,
+                                    struct iovec **iovec,
+                                    bool compat)
 {
        ssize_t ret;
 
-       kiocb->ki_nr_segs = kiocb->ki_nbytes;
+       *nr_segs = kiocb->ki_nbytes;
 
 #ifdef CONFIG_COMPAT
        if (compat)
                ret = compat_rw_copy_check_uvector(rw,
-                               (struct compat_iovec __user *)kiocb->ki_buf,
-                               kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
-                               &kiocb->ki_iovec);
+                               (struct compat_iovec __user *)buf,
+                               *nr_segs, 1, *iovec, iovec);
        else
 #endif
                ret = rw_copy_check_uvector(rw,
-                               (struct iovec __user *)kiocb->ki_buf,
-                               kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
-                               &kiocb->ki_iovec);
+                               (struct iovec __user *)buf,
+                               *nr_segs, 1, *iovec, iovec);
        if (ret < 0)
                return ret;
 
@@ -975,15 +1159,17 @@ static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat)
        return 0;
 }
 
-static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(struct kiocb *kiocb,
+                                      int rw, char __user *buf,
+                                      unsigned long *nr_segs,
+                                      struct iovec *iovec)
 {
-       if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes)))
+       if (unlikely(!access_ok(!rw, buf, kiocb->ki_nbytes)))
                return -EFAULT;
 
-       kiocb->ki_iovec = &kiocb->ki_inline_vec;
-       kiocb->ki_iovec->iov_base = kiocb->ki_buf;
-       kiocb->ki_iovec->iov_len = kiocb->ki_nbytes;
-       kiocb->ki_nr_segs = 1;
+       iovec->iov_base = buf;
+       iovec->iov_len = kiocb->ki_nbytes;
+       *nr_segs = 1;
        return 0;
 }
 
@@ -992,15 +1178,18 @@ static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb)
  *     Performs the initial checks and aio retry method
  *     setup for the kiocb at the time of io submission.
  */
-static ssize_t aio_run_iocb(struct kiocb *req, bool compat)
+static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
+                           char __user *buf, bool compat)
 {
        struct file *file = req->ki_filp;
        ssize_t ret;
+       unsigned long nr_segs;
        int rw;
        fmode_t mode;
        aio_rw_op *rw_op;
+       struct iovec inline_vec, *iovec = &inline_vec;
 
-       switch (req->ki_opcode) {
+       switch (opcode) {
        case IOCB_CMD_PREAD:
        case IOCB_CMD_PREADV:
                mode    = FMODE_READ;
@@ -1021,21 +1210,38 @@ rw_common:
                if (!rw_op)
                        return -EINVAL;
 
-               ret = (req->ki_opcode == IOCB_CMD_PREADV ||
-                      req->ki_opcode == IOCB_CMD_PWRITEV)
-                       ? aio_setup_vectored_rw(rw, req, compat)
-                       : aio_setup_single_vector(rw, req);
+               ret = (opcode == IOCB_CMD_PREADV ||
+                      opcode == IOCB_CMD_PWRITEV)
+                       ? aio_setup_vectored_rw(req, rw, buf, &nr_segs,
+                                               &iovec, compat)
+                       : aio_setup_single_vector(req, rw, buf, &nr_segs,
+                                                 iovec);
                if (ret)
                        return ret;
 
                ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
-               if (ret < 0)
+               if (ret < 0) {
+                       if (iovec != &inline_vec)
+                               kfree(iovec);
                        return ret;
+               }
 
                req->ki_nbytes = ret;
-               req->ki_left = ret;
 
-               ret = aio_rw_vect_retry(req, rw, rw_op);
+               /* XXX: move/kill - rw_verify_area()? */
+               /* This matches the pread()/pwrite() logic */
+               if (req->ki_pos < 0) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               if (rw == WRITE)
+                       file_start_write(file);
+
+               ret = rw_op(req, iovec, nr_segs, req->ki_pos);
+
+               if (rw == WRITE)
+                       file_end_write(file);
                break;
 
        case IOCB_CMD_FDSYNC:
@@ -1057,6 +1263,9 @@ rw_common:
                return -EINVAL;
        }
 
+       if (iovec != &inline_vec)
+               kfree(iovec);
+
        if (ret != -EIOCBQUEUED) {
                /*
                 * There's no easy way to restart the syscall since other AIO's
@@ -1128,21 +1337,18 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
        req->ki_obj.user = user_iocb;
        req->ki_user_data = iocb->aio_data;
        req->ki_pos = iocb->aio_offset;
+       req->ki_nbytes = iocb->aio_nbytes;
 
-       req->ki_buf = (char __user *)(unsigned long)iocb->aio_buf;
-       req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
-       req->ki_opcode = iocb->aio_lio_opcode;
-
-       ret = aio_run_iocb(req, compat);
+       ret = aio_run_iocb(req, iocb->aio_lio_opcode,
+                          (char __user *)(unsigned long)iocb->aio_buf,
+                          compat);
        if (ret)
                goto out_put_req;
 
-       aio_put_req(req);       /* drop extra ref to req */
        return 0;
 out_put_req:
-       atomic_dec(&ctx->reqs_active);
-       aio_put_req(req);       /* drop extra ref to req */
-       aio_put_req(req);       /* drop i/o ref to req */
+       put_reqs_available(ctx, 1);
+       kiocb_free(req);
        return ret;
 }
 
@@ -1195,7 +1401,7 @@ long do_io_submit(aio_context_t ctx_id, long nr,
        }
        blk_finish_plug(&plug);
 
-       put_ioctx(ctx);
+       percpu_ref_put(&ctx->users);
        return i ? i : ret;
 }
 
@@ -1252,7 +1458,6 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
 SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
                struct io_event __user *, result)
 {
-       struct io_event res;
        struct kioctx *ctx;
        struct kiocb *kiocb;
        u32 key;
@@ -1270,21 +1475,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
 
        kiocb = lookup_kiocb(ctx, iocb, key);
        if (kiocb)
-               ret = kiocb_cancel(ctx, kiocb, &res);
+               ret = kiocb_cancel(ctx, kiocb);
        else
                ret = -EINVAL;
 
        spin_unlock_irq(&ctx->ctx_lock);
 
        if (!ret) {
-               /* Cancellation succeeded -- copy the result
-                * into the user's buffer.
+               /*
+                * The result argument is no longer used - the io_event is
+                * always delivered via the ring buffer. -EINPROGRESS indicates
+                * cancellation is progress:
                 */
-               if (copy_to_user(result, &res, sizeof(res)))
-                       ret = -EFAULT;
+               ret = -EINPROGRESS;
        }
 
-       put_ioctx(ctx);
+       percpu_ref_put(&ctx->users);
 
        return ret;
 }
@@ -1313,7 +1519,7 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
        if (likely(ioctx)) {
                if (likely(min_nr <= nr && min_nr >= 0))
                        ret = read_events(ioctx, min_nr, nr, events, timeout);
-               put_ioctx(ioctx);
+               percpu_ref_put(&ioctx->users);
        }
        return ret;
 }
index 47a65df..85c9618 100644 (file)
@@ -108,6 +108,72 @@ static struct file_system_type anon_inode_fs_type = {
        .kill_sb        = kill_anon_super,
 };
 
+/**
+ * anon_inode_getfile_private - creates a new file instance by hooking it up to an
+ *                      anonymous inode, and a dentry that describe the "class"
+ *                      of the file
+ *
+ * @name:    [in]    name of the "class" of the new file
+ * @fops:    [in]    file operations for the new file
+ * @priv:    [in]    private data for the new file (will be file's private_data)
+ * @flags:   [in]    flags
+ *
+ *
+ * Similar to anon_inode_getfile, but each file holds a single inode.
+ *
+ */
+struct file *anon_inode_getfile_private(const char *name,
+                                       const struct file_operations *fops,
+                                       void *priv, int flags)
+{
+       struct qstr this;
+       struct path path;
+       struct file *file;
+       struct inode *inode;
+
+       if (fops->owner && !try_module_get(fops->owner))
+               return ERR_PTR(-ENOENT);
+
+       inode = anon_inode_mkinode(anon_inode_mnt->mnt_sb);
+       if (IS_ERR(inode)) {
+               file = ERR_PTR(-ENOMEM);
+               goto err_module;
+       }
+
+       /*
+        * Link the inode to a directory entry by creating a unique name
+        * using the inode sequence number.
+        */
+       file = ERR_PTR(-ENOMEM);
+       this.name = name;
+       this.len = strlen(name);
+       this.hash = 0;
+       path.dentry = d_alloc_pseudo(anon_inode_mnt->mnt_sb, &this);
+       if (!path.dentry)
+               goto err_module;
+
+       path.mnt = mntget(anon_inode_mnt);
+
+       d_instantiate(path.dentry, inode);
+
+       file = alloc_file(&path, OPEN_FMODE(flags), fops);
+       if (IS_ERR(file))
+               goto err_dput;
+
+       file->f_mapping = inode->i_mapping;
+       file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+       file->private_data = priv;
+
+       return file;
+
+err_dput:
+       path_put(&path);
+err_module:
+       module_put(fops->owner);
+       return file;
+}
+EXPORT_SYMBOL_GPL(anon_inode_getfile_private);
+
 /**
  * anon_inode_getfile - creates a new file instance by hooking it up to an
  *                      anonymous inode, and a dentry that describe the "class"
index ad3ea14..ae28922 100644 (file)
@@ -166,7 +166,7 @@ static void bfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size)
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
 }
 
 static int bfs_write_begin(struct file *file, struct address_space *mapping,
index 8fb4291..6025084 100644 (file)
@@ -716,13 +716,14 @@ int bioset_integrity_create(struct bio_set *bs, int pool_size)
                return 0;
 
        bs->bio_integrity_pool = mempool_create_slab_pool(pool_size, bip_slab);
-
-       bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
-       if (!bs->bvec_integrity_pool)
+       if (!bs->bio_integrity_pool)
                return -1;
 
-       if (!bs->bio_integrity_pool)
+       bs->bvec_integrity_pool = biovec_create_pool(bs, pool_size);
+       if (!bs->bvec_integrity_pool) {
+               mempool_destroy(bs->bio_integrity_pool);
                return -1;
+       }
 
        return 0;
 }
index 1173a4e..1e86823 100644 (file)
@@ -592,7 +592,7 @@ static struct block_device *bd_acquire(struct inode *inode)
        return bdev;
 }
 
-static inline int sb_is_blkdev_sb(struct super_block *sb)
+int sb_is_blkdev_sb(struct super_block *sb)
 {
        return sb == blockdev_superblock;
 }
@@ -1542,7 +1542,7 @@ static ssize_t blkdev_aio_read(struct kiocb *iocb, const struct iovec *iov,
                return 0;
 
        size -= pos;
-       if (size < iocb->ki_left)
+       if (size < iocb->ki_nbytes)
                nr_segs = iov_shorten((struct iovec *)iov, nr_segs, size);
        return generic_file_aio_read(iocb, iov, nr_segs, pos);
 }
index 2b3b832..398cbd5 100644 (file)
@@ -72,3 +72,12 @@ config BTRFS_DEBUG
          performance, or export extra information via sysfs.
 
          If unsure, say N.
+
+config BTRFS_ASSERT
+       bool "Btrfs assert support"
+       depends on BTRFS_FS
+       help
+         Enable run-time assertion checking.  This will result in panics if
+         any of the assertions trip.  This is meant for btrfs developers only.
+
+         If unsure, say N.
index 3932224..a91a6a3 100644 (file)
@@ -8,7 +8,10 @@ btrfs-y += super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \
           extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \
           export.o tree-log.o free-space-cache.o zlib.o lzo.o \
           compression.o delayed-ref.o relocation.o delayed-inode.o scrub.o \
-          reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o
+          reada.o backref.o ulist.o qgroup.o send.o dev-replace.o raid56.o \
+          uuid-tree.o
 
 btrfs-$(CONFIG_BTRFS_FS_POSIX_ACL) += acl.o
 btrfs-$(CONFIG_BTRFS_FS_CHECK_INTEGRITY) += check-integrity.o
+
+btrfs-$(CONFIG_BTRFS_FS_RUN_SANITY_TESTS) += tests/free-space-tests.o
index 8bc5e8c..0552a59 100644 (file)
@@ -119,6 +119,26 @@ struct __prelim_ref {
        u64 wanted_disk_byte;
 };
 
+static struct kmem_cache *btrfs_prelim_ref_cache;
+
+int __init btrfs_prelim_ref_init(void)
+{
+       btrfs_prelim_ref_cache = kmem_cache_create("btrfs_prelim_ref",
+                                       sizeof(struct __prelim_ref),
+                                       0,
+                                       SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+                                       NULL);
+       if (!btrfs_prelim_ref_cache)
+               return -ENOMEM;
+       return 0;
+}
+
+void btrfs_prelim_ref_exit(void)
+{
+       if (btrfs_prelim_ref_cache)
+               kmem_cache_destroy(btrfs_prelim_ref_cache);
+}
+
 /*
  * the rules for all callers of this function are:
  * - obtaining the parent is the goal
@@ -160,12 +180,12 @@ struct __prelim_ref {
 
 static int __add_prelim_ref(struct list_head *head, u64 root_id,
                            struct btrfs_key *key, int level,
-                           u64 parent, u64 wanted_disk_byte, int count)
+                           u64 parent, u64 wanted_disk_byte, int count,
+                           gfp_t gfp_mask)
 {
        struct __prelim_ref *ref;
 
-       /* in case we're adding delayed refs, we're holding the refs spinlock */
-       ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
+       ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
        if (!ref)
                return -ENOMEM;
 
@@ -295,10 +315,9 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info,
        ret = btrfs_search_old_slot(root, &ref->key_for_search, path, time_seq);
        pr_debug("search slot in root %llu (level %d, ref count %d) returned "
                 "%d for key (%llu %u %llu)\n",
-                (unsigned long long)ref->root_id, level, ref->count, ret,
-                (unsigned long long)ref->key_for_search.objectid,
-                ref->key_for_search.type,
-                (unsigned long long)ref->key_for_search.offset);
+                ref->root_id, level, ref->count, ret,
+                ref->key_for_search.objectid, ref->key_for_search.type,
+                ref->key_for_search.offset);
        if (ret < 0)
                goto out;
 
@@ -365,11 +384,12 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
                node = ulist_next(parents, &uiter);
                ref->parent = node ? node->val : 0;
                ref->inode_list = node ?
-                       (struct extent_inode_elem *)(uintptr_t)node->aux : 0;
+                       (struct extent_inode_elem *)(uintptr_t)node->aux : NULL;
 
                /* additional parents require new refs being added here */
                while ((node = ulist_next(parents, &uiter))) {
-                       new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+                       new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
+                                                  GFP_NOFS);
                        if (!new_ref) {
                                ret = -ENOMEM;
                                goto out;
@@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode)
                        ref1->count += ref2->count;
 
                        list_del(&ref2->list);
-                       kfree(ref2);
+                       kmem_cache_free(btrfs_prelim_ref_cache, ref2);
                }
 
        }
@@ -548,7 +568,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        ref = btrfs_delayed_node_to_tree_ref(node);
                        ret = __add_prelim_ref(prefs, ref->root, &op_key,
                                               ref->level + 1, 0, node->bytenr,
-                                              node->ref_mod * sgn);
+                                              node->ref_mod * sgn, GFP_ATOMIC);
                        break;
                }
                case BTRFS_SHARED_BLOCK_REF_KEY: {
@@ -558,7 +578,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        ret = __add_prelim_ref(prefs, ref->root, NULL,
                                               ref->level + 1, ref->parent,
                                               node->bytenr,
-                                              node->ref_mod * sgn);
+                                              node->ref_mod * sgn, GFP_ATOMIC);
                        break;
                }
                case BTRFS_EXTENT_DATA_REF_KEY: {
@@ -570,7 +590,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        key.offset = ref->offset;
                        ret = __add_prelim_ref(prefs, ref->root, &key, 0, 0,
                                               node->bytenr,
-                                              node->ref_mod * sgn);
+                                              node->ref_mod * sgn, GFP_ATOMIC);
                        break;
                }
                case BTRFS_SHARED_DATA_REF_KEY: {
@@ -583,7 +603,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq,
                        key.offset = ref->offset;
                        ret = __add_prelim_ref(prefs, ref->root, &key, 0,
                                               ref->parent, node->bytenr,
-                                              node->ref_mod * sgn);
+                                              node->ref_mod * sgn, GFP_ATOMIC);
                        break;
                }
                default:
@@ -657,7 +677,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                case BTRFS_SHARED_BLOCK_REF_KEY:
                        ret = __add_prelim_ref(prefs, 0, NULL,
                                                *info_level + 1, offset,
-                                               bytenr, 1);
+                                               bytenr, 1, GFP_NOFS);
                        break;
                case BTRFS_SHARED_DATA_REF_KEY: {
                        struct btrfs_shared_data_ref *sdref;
@@ -666,13 +686,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                        sdref = (struct btrfs_shared_data_ref *)(iref + 1);
                        count = btrfs_shared_data_ref_count(leaf, sdref);
                        ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
-                                              bytenr, count);
+                                              bytenr, count, GFP_NOFS);
                        break;
                }
                case BTRFS_TREE_BLOCK_REF_KEY:
                        ret = __add_prelim_ref(prefs, offset, NULL,
                                               *info_level + 1, 0,
-                                              bytenr, 1);
+                                              bytenr, 1, GFP_NOFS);
                        break;
                case BTRFS_EXTENT_DATA_REF_KEY: {
                        struct btrfs_extent_data_ref *dref;
@@ -687,7 +707,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info,
                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
                        root = btrfs_extent_data_ref_root(leaf, dref);
                        ret = __add_prelim_ref(prefs, root, &key, 0, 0,
-                                              bytenr, count);
+                                              bytenr, count, GFP_NOFS);
                        break;
                }
                default:
@@ -738,7 +758,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                case BTRFS_SHARED_BLOCK_REF_KEY:
                        ret = __add_prelim_ref(prefs, 0, NULL,
                                                info_level + 1, key.offset,
-                                               bytenr, 1);
+                                               bytenr, 1, GFP_NOFS);
                        break;
                case BTRFS_SHARED_DATA_REF_KEY: {
                        struct btrfs_shared_data_ref *sdref;
@@ -748,13 +768,13 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                                              struct btrfs_shared_data_ref);
                        count = btrfs_shared_data_ref_count(leaf, sdref);
                        ret = __add_prelim_ref(prefs, 0, NULL, 0, key.offset,
-                                               bytenr, count);
+                                               bytenr, count, GFP_NOFS);
                        break;
                }
                case BTRFS_TREE_BLOCK_REF_KEY:
                        ret = __add_prelim_ref(prefs, key.offset, NULL,
                                               info_level + 1, 0,
-                                              bytenr, 1);
+                                              bytenr, 1, GFP_NOFS);
                        break;
                case BTRFS_EXTENT_DATA_REF_KEY: {
                        struct btrfs_extent_data_ref *dref;
@@ -770,7 +790,7 @@ static int __add_keyed_refs(struct btrfs_fs_info *fs_info,
                        key.offset = btrfs_extent_data_ref_offset(leaf, dref);
                        root = btrfs_extent_data_ref_root(leaf, dref);
                        ret = __add_prelim_ref(prefs, root, &key, 0, 0,
-                                              bytenr, count);
+                                              bytenr, count, GFP_NOFS);
                        break;
                }
                default:
@@ -911,7 +931,6 @@ again:
 
        while (!list_empty(&prefs)) {
                ref = list_first_entry(&prefs, struct __prelim_ref, list);
-               list_del(&ref->list);
                WARN_ON(ref->count < 0);
                if (ref->count && ref->root_id && ref->parent == 0) {
                        /* no parent == root of tree */
@@ -935,8 +954,10 @@ again:
                                }
                                ret = find_extent_in_eb(eb, bytenr,
                                                        *extent_item_pos, &eie);
-                               ref->inode_list = eie;
                                free_extent_buffer(eb);
+                               if (ret < 0)
+                                       goto out;
+                               ref->inode_list = eie;
                        }
                        ret = ulist_add_merge(refs, ref->parent,
                                              (uintptr_t)ref->inode_list,
@@ -954,7 +975,8 @@ again:
                                eie->next = ref->inode_list;
                        }
                }
-               kfree(ref);
+               list_del(&ref->list);
+               kmem_cache_free(btrfs_prelim_ref_cache, ref);
        }
 
 out:
@@ -962,13 +984,13 @@ out:
        while (!list_empty(&prefs)) {
                ref = list_first_entry(&prefs, struct __prelim_ref, list);
                list_del(&ref->list);
-               kfree(ref);
+               kmem_cache_free(btrfs_prelim_ref_cache, ref);
        }
        while (!list_empty(&prefs_delayed)) {
                ref = list_first_entry(&prefs_delayed, struct __prelim_ref,
                                       list);
                list_del(&ref->list);
-               kfree(ref);
+               kmem_cache_free(btrfs_prelim_ref_cache, ref);
        }
 
        return ret;
@@ -1326,8 +1348,7 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
             found_key->type != BTRFS_METADATA_ITEM_KEY) ||
            found_key->objectid > logical ||
            found_key->objectid + size <= logical) {
-               pr_debug("logical %llu is not within any extent\n",
-                        (unsigned long long)logical);
+               pr_debug("logical %llu is not within any extent\n", logical);
                return -ENOENT;
        }
 
@@ -1340,11 +1361,8 @@ int extent_from_logical(struct btrfs_fs_info *fs_info, u64 logical,
 
        pr_debug("logical %llu is at position %llu within the extent (%llu "
                 "EXTENT_ITEM %llu) flags %#llx size %u\n",
-                (unsigned long long)logical,
-                (unsigned long long)(logical - found_key->objectid),
-                (unsigned long long)found_key->objectid,
-                (unsigned long long)found_key->offset,
-                (unsigned long long)flags, item_size);
+                logical, logical - found_key->objectid, found_key->objectid,
+                found_key->offset, flags, item_size);
 
        WARN_ON(!flags_ret);
        if (flags_ret) {
@@ -1516,7 +1534,7 @@ int iterate_extent_inodes(struct btrfs_fs_info *fs_info,
                while (!ret && (root_node = ulist_next(roots, &root_uiter))) {
                        pr_debug("root %llu references leaf %llu, data list "
                                 "%#llx\n", root_node->val, ref_node->val,
-                                (long long)ref_node->aux);
+                                ref_node->aux);
                        ret = iterate_leaf_refs((struct extent_inode_elem *)
                                                (uintptr_t)ref_node->aux,
                                                root_node->val,
@@ -1608,9 +1626,8 @@ static int iterate_inode_refs(u64 inum, struct btrfs_root *fs_root,
                        name_len = btrfs_inode_ref_name_len(eb, iref);
                        /* path must be released before calling iterate()! */
                        pr_debug("following ref at offset %u for inode %llu in "
-                                "tree %llu\n", cur,
-                                (unsigned long long)found_key.objectid,
-                                (unsigned long long)fs_root->objectid);
+                                "tree %llu\n", cur, found_key.objectid,
+                                fs_root->objectid);
                        ret = iterate(parent, name_len,
                                      (unsigned long)(iref + 1), eb, ctx);
                        if (ret)
index 8f2e767..a910b27 100644 (file)
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid,
                          struct btrfs_inode_extref **ret_extref,
                          u64 *found_off);
 
+int __init btrfs_prelim_ref_init(void);
+void btrfs_prelim_ref_exit(void);
 #endif
index 08b286b..d0ae226 100644 (file)
@@ -218,6 +218,27 @@ static inline int btrfs_inode_in_log(struct inode *inode, u64 generation)
        return 0;
 }
 
+struct btrfs_dio_private {
+       struct inode *inode;
+       u64 logical_offset;
+       u64 disk_bytenr;
+       u64 bytes;
+       void *private;
+
+       /* number of bios pending for this dio */
+       atomic_t pending_bios;
+
+       /* IO errors */
+       int errors;
+
+       /* orig_bio is our btrfs_io_bio */
+       struct bio *orig_bio;
+
+       /* dio_bio came from fs/direct-io.c */
+       struct bio *dio_bio;
+       u8 csum[0];
+};
+
 /*
  * Disable DIO read nolock optimization, so new dio readers will be forced
  * to grab i_mutex. It is used to avoid the endless truncate due to
index 1431a69..1c47be1 100644 (file)
@@ -701,15 +701,13 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
                        next_bytenr = btrfs_super_root(selected_super);
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "root@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "root@%llu\n", next_bytenr);
                        break;
                case 1:
                        next_bytenr = btrfs_super_chunk_root(selected_super);
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "chunk@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "chunk@%llu\n", next_bytenr);
                        break;
                case 2:
                        next_bytenr = btrfs_super_log_root(selected_super);
@@ -717,8 +715,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
                                continue;
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "log@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "log@%llu\n", next_bytenr);
                        break;
                }
 
@@ -727,7 +724,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
                                     next_bytenr, state->metablock_size);
                if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
                        printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
-                              (unsigned long long)next_bytenr, num_copies);
+                              next_bytenr, num_copies);
 
                for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
                        struct btrfsic_block *next_block;
@@ -742,8 +739,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
                                printk(KERN_INFO "btrfsic:"
                                       " btrfsic_map_block(root @%llu,"
                                       " mirror %d) failed!\n",
-                                      (unsigned long long)next_bytenr,
-                                      mirror_num);
+                                      next_bytenr, mirror_num);
                                kfree(selected_super);
                                return -1;
                        }
@@ -767,7 +763,6 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
                        if (ret < (int)PAGE_CACHE_SIZE) {
                                printk(KERN_INFO
                                       "btrfsic: read @logical %llu failed!\n",
-                                      (unsigned long long)
                                       tmp_next_block_ctx.start);
                                btrfsic_release_block_ctx(&tmp_next_block_ctx);
                                kfree(selected_super);
@@ -813,7 +808,7 @@ static int btrfsic_process_superblock_dev_mirror(
            (bh->b_data + (dev_bytenr & 4095));
 
        if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
-           super_tmp->magic != cpu_to_le64(BTRFS_MAGIC) ||
+           btrfs_super_magic(super_tmp) != BTRFS_MAGIC ||
            memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
            btrfs_super_nodesize(super_tmp) != state->metablock_size ||
            btrfs_super_leafsize(super_tmp) != state->metablock_size ||
@@ -847,10 +842,8 @@ static int btrfsic_process_superblock_dev_mirror(
                        printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
                                     " @%llu (%s/%llu/%d)\n",
                                     superblock_bdev,
-                                    rcu_str_deref(device->name),
-                                    (unsigned long long)dev_bytenr,
-                                    dev_state->name,
-                                    (unsigned long long)dev_bytenr,
+                                    rcu_str_deref(device->name), dev_bytenr,
+                                    dev_state->name, dev_bytenr,
                                     superblock_mirror_num);
                list_add(&superblock_tmp->all_blocks_node,
                         &state->all_blocks_list);
@@ -880,20 +873,20 @@ static int btrfsic_process_superblock_dev_mirror(
                tmp_disk_key.offset = 0;
                switch (pass) {
                case 0:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_ROOT_TREE_OBJECTID);
                        additional_string = "initial root ";
                        next_bytenr = btrfs_super_root(super_tmp);
                        break;
                case 1:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_CHUNK_TREE_OBJECTID);
                        additional_string = "initial chunk ";
                        next_bytenr = btrfs_super_chunk_root(super_tmp);
                        break;
                case 2:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_TREE_LOG_OBJECTID);
                        additional_string = "initial log ";
                        next_bytenr = btrfs_super_log_root(super_tmp);
                        if (0 == next_bytenr)
@@ -906,7 +899,7 @@ static int btrfsic_process_superblock_dev_mirror(
                                     next_bytenr, state->metablock_size);
                if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
                        printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
-                              (unsigned long long)next_bytenr, num_copies);
+                              next_bytenr, num_copies);
                for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
                        struct btrfsic_block *next_block;
                        struct btrfsic_block_data_ctx tmp_next_block_ctx;
@@ -918,8 +911,7 @@ static int btrfsic_process_superblock_dev_mirror(
                                              mirror_num)) {
                                printk(KERN_INFO "btrfsic: btrfsic_map_block("
                                       "bytenr @%llu, mirror %d) failed!\n",
-                                      (unsigned long long)next_bytenr,
-                                      mirror_num);
+                                      next_bytenr, mirror_num);
                                brelse(bh);
                                return -1;
                        }
@@ -1003,19 +995,17 @@ continue_with_new_stack_frame:
                    (struct btrfs_leaf *)sf->hdr;
 
                if (-1 == sf->i) {
-                       sf->nr = le32_to_cpu(leafhdr->header.nritems);
+                       sf->nr = btrfs_stack_header_nritems(&leafhdr->header);
 
                        if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
                                printk(KERN_INFO
                                       "leaf %llu items %d generation %llu"
                                       " owner %llu\n",
-                                      (unsigned long long)
-                                      sf->block_ctx->start,
-                                      sf->nr,
-                                      (unsigned long long)
-                                      le64_to_cpu(leafhdr->header.generation),
-                                      (unsigned long long)
-                                      le64_to_cpu(leafhdr->header.owner));
+                                      sf->block_ctx->start, sf->nr,
+                                      btrfs_stack_header_generation(
+                                              &leafhdr->header),
+                                      btrfs_stack_header_owner(
+                                              &leafhdr->header));
                }
 
 continue_with_current_leaf_stack_frame:
@@ -1047,10 +1037,10 @@ leaf_item_out_of_bounce_error:
                                                     &disk_item,
                                                     disk_item_offset,
                                                     sizeof(struct btrfs_item));
-                       item_offset = le32_to_cpu(disk_item.offset);
-                       item_size = le32_to_cpu(disk_item.size);
+                       item_offset = btrfs_stack_item_offset(&disk_item);
+                       item_size = btrfs_stack_item_offset(&disk_item);
                        disk_key = &disk_item.key;
-                       type = disk_key->type;
+                       type = btrfs_disk_key_type(disk_key);
 
                        if (BTRFS_ROOT_ITEM_KEY == type) {
                                struct btrfs_root_item root_item;
@@ -1066,7 +1056,7 @@ leaf_item_out_of_bounce_error:
                                        sf->block_ctx, &root_item,
                                        root_item_offset,
                                        item_size);
-                               next_bytenr = le64_to_cpu(root_item.bytenr);
+                               next_bytenr = btrfs_root_bytenr(&root_item);
 
                                sf->error =
                                    btrfsic_create_link_to_next_block(
@@ -1081,8 +1071,8 @@ leaf_item_out_of_bounce_error:
                                                &sf->num_copies,
                                                &sf->mirror_num,
                                                disk_key,
-                                               le64_to_cpu(root_item.
-                                               generation));
+                                               btrfs_root_generation(
+                                               &root_item));
                                if (sf->error)
                                        goto one_stack_frame_backwards;
 
@@ -1130,18 +1120,17 @@ leaf_item_out_of_bounce_error:
                struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
 
                if (-1 == sf->i) {
-                       sf->nr = le32_to_cpu(nodehdr->header.nritems);
+                       sf->nr = btrfs_stack_header_nritems(&nodehdr->header);
 
                        if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
                                printk(KERN_INFO "node %llu level %d items %d"
                                       " generation %llu owner %llu\n",
-                                      (unsigned long long)
                                       sf->block_ctx->start,
                                       nodehdr->header.level, sf->nr,
-                                      (unsigned long long)
-                                      le64_to_cpu(nodehdr->header.generation),
-                                      (unsigned long long)
-                                      le64_to_cpu(nodehdr->header.owner));
+                                      btrfs_stack_header_generation(
+                                      &nodehdr->header),
+                                      btrfs_stack_header_owner(
+                                      &nodehdr->header));
                }
 
 continue_with_current_node_stack_frame:
@@ -1168,7 +1157,7 @@ continue_with_current_node_stack_frame:
                        btrfsic_read_from_block_data(
                                sf->block_ctx, &key_ptr, key_ptr_offset,
                                sizeof(struct btrfs_key_ptr));
-                       next_bytenr = le64_to_cpu(key_ptr.blockptr);
+                       next_bytenr = btrfs_stack_key_blockptr(&key_ptr);
 
                        sf->error = btrfsic_create_link_to_next_block(
                                        state,
@@ -1182,7 +1171,7 @@ continue_with_current_node_stack_frame:
                                        &sf->num_copies,
                                        &sf->mirror_num,
                                        &key_ptr.key,
-                                       le64_to_cpu(key_ptr.generation));
+                                       btrfs_stack_key_generation(&key_ptr));
                        if (sf->error)
                                goto one_stack_frame_backwards;
 
@@ -1247,8 +1236,7 @@ static void btrfsic_read_from_block_data(
        unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
 
        WARN_ON(offset + len > block_ctx->len);
-       offset_in_page = (start_offset + offset) &
-                        ((unsigned long)PAGE_CACHE_SIZE - 1);
+       offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
@@ -1290,7 +1278,7 @@ static int btrfsic_create_link_to_next_block(
                                     next_bytenr, state->metablock_size);
                if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
                        printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
-                              (unsigned long long)next_bytenr, *num_copiesp);
+                              next_bytenr, *num_copiesp);
                *mirror_nump = 1;
        }
 
@@ -1307,7 +1295,7 @@ static int btrfsic_create_link_to_next_block(
        if (ret) {
                printk(KERN_INFO
                       "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
-                      (unsigned long long)next_bytenr, *mirror_nump);
+                      next_bytenr, *mirror_nump);
                btrfsic_release_block_ctx(next_block_ctx);
                *next_blockp = NULL;
                return -1;
@@ -1335,20 +1323,16 @@ static int btrfsic_create_link_to_next_block(
                               "Referenced block @%llu (%s/%llu/%d)"
                               " found in hash table, %c,"
                               " bytenr mismatch (!= stored %llu).\n",
-                              (unsigned long long)next_bytenr,
-                              next_block_ctx->dev->name,
-                              (unsigned long long)next_block_ctx->dev_bytenr,
-                              *mirror_nump,
+                              next_bytenr, next_block_ctx->dev->name,
+                              next_block_ctx->dev_bytenr, *mirror_nump,
                               btrfsic_get_block_type(state, next_block),
-                              (unsigned long long)next_block->logical_bytenr);
+                              next_block->logical_bytenr);
                } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
                        printk(KERN_INFO
                               "Referenced block @%llu (%s/%llu/%d)"
                               " found in hash table, %c.\n",
-                              (unsigned long long)next_bytenr,
-                              next_block_ctx->dev->name,
-                              (unsigned long long)next_block_ctx->dev_bytenr,
-                              *mirror_nump,
+                              next_bytenr, next_block_ctx->dev->name,
+                              next_block_ctx->dev_bytenr, *mirror_nump,
                               btrfsic_get_block_type(state, next_block));
                next_block->logical_bytenr = next_bytenr;
 
@@ -1400,7 +1384,7 @@ static int btrfsic_create_link_to_next_block(
                if (ret < (int)next_block_ctx->len) {
                        printk(KERN_INFO
                               "btrfsic: read block @logical %llu failed!\n",
-                              (unsigned long long)next_bytenr);
+                              next_bytenr);
                        btrfsic_release_block_ctx(next_block_ctx);
                        *next_blockp = NULL;
                        return -1;
@@ -1444,12 +1428,12 @@ static int btrfsic_handle_extent_data(
                file_extent_item_offset,
                offsetof(struct btrfs_file_extent_item, disk_num_bytes));
        if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
-           ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
+           btrfs_stack_file_extent_disk_bytenr(&file_extent_item) == 0) {
                if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
                        printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
                               file_extent_item.type,
-                              (unsigned long long)
-                              le64_to_cpu(file_extent_item.disk_bytenr));
+                              btrfs_stack_file_extent_disk_bytenr(
+                              &file_extent_item));
                return 0;
        }
 
@@ -1463,20 +1447,19 @@ static int btrfsic_handle_extent_data(
        btrfsic_read_from_block_data(block_ctx, &file_extent_item,
                                     file_extent_item_offset,
                                     sizeof(struct btrfs_file_extent_item));
-       next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
-                     le64_to_cpu(file_extent_item.offset);
-       generation = le64_to_cpu(file_extent_item.generation);
-       num_bytes = le64_to_cpu(file_extent_item.num_bytes);
-       generation = le64_to_cpu(file_extent_item.generation);
+       next_bytenr = btrfs_stack_file_extent_disk_bytenr(&file_extent_item) +
+                     btrfs_stack_file_extent_offset(&file_extent_item);
+       generation = btrfs_stack_file_extent_generation(&file_extent_item);
+       num_bytes = btrfs_stack_file_extent_num_bytes(&file_extent_item);
+       generation = btrfs_stack_file_extent_generation(&file_extent_item);
 
        if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
                printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
                       " offset = %llu, num_bytes = %llu\n",
                       file_extent_item.type,
-                      (unsigned long long)
-                      le64_to_cpu(file_extent_item.disk_bytenr),
-                      (unsigned long long)le64_to_cpu(file_extent_item.offset),
-                      (unsigned long long)num_bytes);
+                      btrfs_stack_file_extent_disk_bytenr(&file_extent_item),
+                      btrfs_stack_file_extent_offset(&file_extent_item),
+                      num_bytes);
        while (num_bytes > 0) {
                u32 chunk_len;
                int num_copies;
@@ -1492,7 +1475,7 @@ static int btrfsic_handle_extent_data(
                                     next_bytenr, state->datablock_size);
                if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
                        printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
-                              (unsigned long long)next_bytenr, num_copies);
+                              next_bytenr, num_copies);
                for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
                        struct btrfsic_block_data_ctx next_block_ctx;
                        struct btrfsic_block *next_block;
@@ -1504,8 +1487,7 @@ static int btrfsic_handle_extent_data(
                        if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
                                printk(KERN_INFO
                                       "\tdisk_bytenr = %llu, num_bytes %u\n",
-                                      (unsigned long long)next_bytenr,
-                                      chunk_len);
+                                      next_bytenr, chunk_len);
                        ret = btrfsic_map_block(state, next_bytenr,
                                                chunk_len, &next_block_ctx,
                                                mirror_num);
@@ -1513,8 +1495,7 @@ static int btrfsic_handle_extent_data(
                                printk(KERN_INFO
                                       "btrfsic: btrfsic_map_block(@%llu,"
                                       " mirror=%d) failed!\n",
-                                      (unsigned long long)next_bytenr,
-                                      mirror_num);
+                                      next_bytenr, mirror_num);
                                return -1;
                        }
 
@@ -1543,12 +1524,10 @@ static int btrfsic_handle_extent_data(
                                               " found in hash table, D,"
                                               " bytenr mismatch"
                                               " (!= stored %llu).\n",
-                                              (unsigned long long)next_bytenr,
+                                              next_bytenr,
                                               next_block_ctx.dev->name,
-                                              (unsigned long long)
                                               next_block_ctx.dev_bytenr,
                                               mirror_num,
-                                              (unsigned long long)
                                               next_block->logical_bytenr);
                                }
                                next_block->logical_bytenr = next_bytenr;
@@ -1675,7 +1654,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
        if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
                printk(KERN_INFO
                       "btrfsic: read_block() with unaligned bytenr %llu\n",
-                      (unsigned long long)block_ctx->dev_bytenr);
+                      block_ctx->dev_bytenr);
                return -1;
        }
 
@@ -1772,10 +1751,8 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
 
                printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
                       btrfsic_get_block_type(state, b_all),
-                      (unsigned long long)b_all->logical_bytenr,
-                      b_all->dev_state->name,
-                      (unsigned long long)b_all->dev_bytenr,
-                      b_all->mirror_num);
+                      b_all->logical_bytenr, b_all->dev_state->name,
+                      b_all->dev_bytenr, b_all->mirror_num);
 
                list_for_each(elem_ref_to, &b_all->ref_to_list) {
                        const struct btrfsic_block_link *const l =
@@ -1787,16 +1764,13 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
                               " refers %u* to"
                               " %c @%llu (%s/%llu/%d)\n",
                               btrfsic_get_block_type(state, b_all),
-                              (unsigned long long)b_all->logical_bytenr,
-                              b_all->dev_state->name,
-                              (unsigned long long)b_all->dev_bytenr,
-                              b_all->mirror_num,
+                              b_all->logical_bytenr, b_all->dev_state->name,
+                              b_all->dev_bytenr, b_all->mirror_num,
                               l->ref_cnt,
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num);
                }
 
@@ -1810,16 +1784,12 @@ static void btrfsic_dump_database(struct btrfsic_state *state)
                               " is ref %u* from"
                               " %c @%llu (%s/%llu/%d)\n",
                               btrfsic_get_block_type(state, b_all),
-                              (unsigned long long)b_all->logical_bytenr,
-                              b_all->dev_state->name,
-                              (unsigned long long)b_all->dev_bytenr,
-                              b_all->mirror_num,
+                              b_all->logical_bytenr, b_all->dev_state->name,
+                              b_all->dev_bytenr, b_all->mirror_num,
                               l->ref_cnt,
                               btrfsic_get_block_type(state, l->block_ref_from),
-                              (unsigned long long)
                               l->block_ref_from->logical_bytenr,
                               l->block_ref_from->dev_state->name,
-                              (unsigned long long)
                               l->block_ref_from->dev_bytenr,
                               l->block_ref_from->mirror_num);
                }
@@ -1896,8 +1866,8 @@ again:
                struct list_head *tmp_ref_to;
 
                if (block->is_superblock) {
-                       bytenr = le64_to_cpu(((struct btrfs_super_block *)
-                                             mapped_datav[0])->bytenr);
+                       bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
+                                                   mapped_datav[0]);
                        if (num_pages * PAGE_CACHE_SIZE <
                            BTRFS_SUPER_INFO_SIZE) {
                                printk(KERN_INFO
@@ -1923,8 +1893,9 @@ again:
                                        return;
                                }
                                processed_len = state->metablock_size;
-                               bytenr = le64_to_cpu(((struct btrfs_header *)
-                                                     mapped_datav[0])->bytenr);
+                               bytenr = btrfs_stack_header_bytenr(
+                                               (struct btrfs_header *)
+                                               mapped_datav[0]);
                                btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
                                                               dev_state,
                                                               dev_bytenr);
@@ -1935,12 +1906,9 @@ again:
                                       " found in hash table, %c,"
                                       " bytenr mismatch"
                                       " (!= stored %llu).\n",
-                                      (unsigned long long)bytenr,
-                                      dev_state->name,
-                                      (unsigned long long)dev_bytenr,
+                                      bytenr, dev_state->name, dev_bytenr,
                                       block->mirror_num,
                                       btrfsic_get_block_type(state, block),
-                                      (unsigned long long)
                                       block->logical_bytenr);
                                block->logical_bytenr = bytenr;
                        } else if (state->print_mask &
@@ -1948,9 +1916,7 @@ again:
                                printk(KERN_INFO
                                       "Written block @%llu (%s/%llu/%d)"
                                       " found in hash table, %c.\n",
-                                      (unsigned long long)bytenr,
-                                      dev_state->name,
-                                      (unsigned long long)dev_bytenr,
+                                      bytenr, dev_state->name, dev_bytenr,
                                       block->mirror_num,
                                       btrfsic_get_block_type(state, block));
                } else {
@@ -1966,9 +1932,7 @@ again:
                                printk(KERN_INFO
                                       "Written block @%llu (%s/%llu/%d)"
                                       " found in hash table, %c.\n",
-                                      (unsigned long long)bytenr,
-                                      dev_state->name,
-                                      (unsigned long long)dev_bytenr,
+                                      bytenr, dev_state->name, dev_bytenr,
                                       block->mirror_num,
                                       btrfsic_get_block_type(state, block));
                }
@@ -1985,21 +1949,14 @@ again:
                               " new(gen=%llu),"
                               " which is referenced by most recent superblock"
                               " (superblockgen=%llu)!\n",
-                              btrfsic_get_block_type(state, block),
-                              (unsigned long long)bytenr,
-                              dev_state->name,
-                              (unsigned long long)dev_bytenr,
-                              block->mirror_num,
-                              (unsigned long long)block->generation,
-                              (unsigned long long)
-                              le64_to_cpu(block->disk_key.objectid),
+                              btrfsic_get_block_type(state, block), bytenr,
+                              dev_state->name, dev_bytenr, block->mirror_num,
+                              block->generation,
+                              btrfs_disk_key_objectid(&block->disk_key),
                               block->disk_key.type,
-                              (unsigned long long)
-                              le64_to_cpu(block->disk_key.offset),
-                              (unsigned long long)
-                              le64_to_cpu(((struct btrfs_header *)
-                                           mapped_datav[0])->generation),
-                              (unsigned long long)
+                              btrfs_disk_key_offset(&block->disk_key),
+                              btrfs_stack_header_generation(
+                                      (struct btrfs_header *) mapped_datav[0]),
                               state->max_superblock_generation);
                        btrfsic_dump_tree(state);
                }
@@ -2008,15 +1965,12 @@ again:
                        printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
                               " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
                               " which is not yet iodone!\n",
-                              btrfsic_get_block_type(state, block),
-                              (unsigned long long)bytenr,
-                              dev_state->name,
-                              (unsigned long long)dev_bytenr,
-                              block->mirror_num,
-                              (unsigned long long)block->generation,
-                              (unsigned long long)
-                              le64_to_cpu(((struct btrfs_header *)
-                                           mapped_datav[0])->generation));
+                              btrfsic_get_block_type(state, block), bytenr,
+                              dev_state->name, dev_bytenr, block->mirror_num,
+                              block->generation,
+                              btrfs_stack_header_generation(
+                                      (struct btrfs_header *)
+                                      mapped_datav[0]));
                        /* it would not be safe to go on */
                        btrfsic_dump_tree(state);
                        goto continue_loop;
@@ -2056,7 +2010,7 @@ again:
                if (ret) {
                        printk(KERN_INFO
                               "btrfsic: btrfsic_map_block(root @%llu)"
-                              " failed!\n", (unsigned long long)bytenr);
+                              " failed!\n", bytenr);
                        goto continue_loop;
                }
                block_ctx.datav = mapped_datav;
@@ -2140,7 +2094,7 @@ again:
                                printk(KERN_INFO
                                       "btrfsic: btrfsic_process_metablock"
                                       "(root @%llu) failed!\n",
-                                      (unsigned long long)dev_bytenr);
+                                      dev_bytenr);
                } else {
                        block->is_metadata = 0;
                        block->mirror_num = 0;  /* unknown */
@@ -2168,8 +2122,7 @@ again:
                        if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
                                printk(KERN_INFO "Written block (%s/%llu/?)"
                                       " !found in hash table, D.\n",
-                                      dev_state->name,
-                                      (unsigned long long)dev_bytenr);
+                                      dev_state->name, dev_bytenr);
                        if (!state->include_extent_data) {
                                /* ignore that written D block */
                                goto continue_loop;
@@ -2184,17 +2137,16 @@ again:
                        block_ctx.pagev = NULL;
                } else {
                        processed_len = state->metablock_size;
-                       bytenr = le64_to_cpu(((struct btrfs_header *)
-                                             mapped_datav[0])->bytenr);
+                       bytenr = btrfs_stack_header_bytenr(
+                                       (struct btrfs_header *)
+                                       mapped_datav[0]);
                        btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
                                                       dev_bytenr);
                        if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
                                printk(KERN_INFO
                                       "Written block @%llu (%s/%llu/?)"
                                       " !found in hash table, M.\n",
-                                      (unsigned long long)bytenr,
-                                      dev_state->name,
-                                      (unsigned long long)dev_bytenr);
+                                      bytenr, dev_state->name, dev_bytenr);
 
                        ret = btrfsic_map_block(state, bytenr, processed_len,
                                                &block_ctx, 0);
@@ -2202,7 +2154,7 @@ again:
                                printk(KERN_INFO
                                       "btrfsic: btrfsic_map_block(root @%llu)"
                                       " failed!\n",
-                                      (unsigned long long)dev_bytenr);
+                                      dev_bytenr);
                                goto continue_loop;
                        }
                }
@@ -2267,10 +2219,8 @@ again:
                        printk(KERN_INFO
                               "New written %c-block @%llu (%s/%llu/%d)\n",
                               is_metadata ? 'M' : 'D',
-                              (unsigned long long)block->logical_bytenr,
-                              block->dev_state->name,
-                              (unsigned long long)block->dev_bytenr,
-                              block->mirror_num);
+                              block->logical_bytenr, block->dev_state->name,
+                              block->dev_bytenr, block->mirror_num);
                list_add(&block->all_blocks_node, &state->all_blocks_list);
                btrfsic_block_hashtable_add(block, &state->block_hashtable);
 
@@ -2281,7 +2231,7 @@ again:
                                printk(KERN_INFO
                                       "btrfsic: process_metablock(root @%llu)"
                                       " failed!\n",
-                                      (unsigned long long)dev_bytenr);
+                                      dev_bytenr);
                }
                btrfsic_release_block_ctx(&block_ctx);
        }
@@ -2319,10 +2269,8 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
                               "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
                               bio_error_status,
                               btrfsic_get_block_type(dev_state->state, block),
-                              (unsigned long long)block->logical_bytenr,
-                              dev_state->name,
-                              (unsigned long long)block->dev_bytenr,
-                              block->mirror_num);
+                              block->logical_bytenr, dev_state->name,
+                              block->dev_bytenr, block->mirror_num);
                next_block = block->next_in_same_bio;
                block->iodone_w_error = iodone_w_error;
                if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2332,7 +2280,6 @@ static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
                                printk(KERN_INFO
                                       "bio_end_io() new %s flush_gen=%llu\n",
                                       dev_state->name,
-                                      (unsigned long long)
                                       dev_state->last_flush_gen);
                }
                if (block->submit_bio_bh_rw & REQ_FUA)
@@ -2358,10 +2305,8 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
                       "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
                       iodone_w_error,
                       btrfsic_get_block_type(dev_state->state, block),
-                      (unsigned long long)block->logical_bytenr,
-                      block->dev_state->name,
-                      (unsigned long long)block->dev_bytenr,
-                      block->mirror_num);
+                      block->logical_bytenr, block->dev_state->name,
+                      block->dev_bytenr, block->mirror_num);
 
        block->iodone_w_error = iodone_w_error;
        if (block->submit_bio_bh_rw & REQ_FLUSH) {
@@ -2370,8 +2315,7 @@ static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
                     BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
                        printk(KERN_INFO
                               "bh_end_io() new %s flush_gen=%llu\n",
-                              dev_state->name,
-                              (unsigned long long)dev_state->last_flush_gen);
+                              dev_state->name, dev_state->last_flush_gen);
        }
        if (block->submit_bio_bh_rw & REQ_FUA)
                block->flush_gen = 0; /* FUA completed means block is on disk */
@@ -2396,26 +2340,20 @@ static int btrfsic_process_written_superblock(
                        printk(KERN_INFO
                               "btrfsic: superblock @%llu (%s/%llu/%d)"
                               " with old gen %llu <= %llu\n",
-                              (unsigned long long)superblock->logical_bytenr,
+                              superblock->logical_bytenr,
                               superblock->dev_state->name,
-                              (unsigned long long)superblock->dev_bytenr,
-                              superblock->mirror_num,
-                              (unsigned long long)
+                              superblock->dev_bytenr, superblock->mirror_num,
                               btrfs_super_generation(super_hdr),
-                              (unsigned long long)
                               state->max_superblock_generation);
        } else {
                if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
                        printk(KERN_INFO
                               "btrfsic: got new superblock @%llu (%s/%llu/%d)"
                               " with new gen %llu > %llu\n",
-                              (unsigned long long)superblock->logical_bytenr,
+                              superblock->logical_bytenr,
                               superblock->dev_state->name,
-                              (unsigned long long)superblock->dev_bytenr,
-                              superblock->mirror_num,
-                              (unsigned long long)
+                              superblock->dev_bytenr, superblock->mirror_num,
                               btrfs_super_generation(super_hdr),
-                              (unsigned long long)
                               state->max_superblock_generation);
 
                state->max_superblock_generation =
@@ -2432,43 +2370,41 @@ static int btrfsic_process_written_superblock(
                int num_copies;
                int mirror_num;
                const char *additional_string = NULL;
-               struct btrfs_disk_key tmp_disk_key;
+               struct btrfs_disk_key tmp_disk_key = {0};
 
-               tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
-               tmp_disk_key.offset = 0;
+               btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                           BTRFS_ROOT_ITEM_KEY);
+               btrfs_set_disk_key_objectid(&tmp_disk_key, 0);
 
                switch (pass) {
                case 0:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_ROOT_TREE_OBJECTID);
                        additional_string = "root ";
                        next_bytenr = btrfs_super_root(super_hdr);
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "root@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "root@%llu\n", next_bytenr);
                        break;
                case 1:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_CHUNK_TREE_OBJECTID);
                        additional_string = "chunk ";
                        next_bytenr = btrfs_super_chunk_root(super_hdr);
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "chunk@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "chunk@%llu\n", next_bytenr);
                        break;
                case 2:
-                       tmp_disk_key.objectid =
-                           cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
+                       btrfs_set_disk_key_objectid(&tmp_disk_key,
+                                                   BTRFS_TREE_LOG_OBJECTID);
                        additional_string = "log ";
                        next_bytenr = btrfs_super_log_root(super_hdr);
                        if (0 == next_bytenr)
                                continue;
                        if (state->print_mask &
                            BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
-                               printk(KERN_INFO "log@%llu\n",
-                                      (unsigned long long)next_bytenr);
+                               printk(KERN_INFO "log@%llu\n", next_bytenr);
                        break;
                }
 
@@ -2477,7 +2413,7 @@ static int btrfsic_process_written_superblock(
                                     next_bytenr, BTRFS_SUPER_INFO_SIZE);
                if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
                        printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
-                              (unsigned long long)next_bytenr, num_copies);
+                              next_bytenr, num_copies);
                for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
                        int was_created;
 
@@ -2493,8 +2429,7 @@ static int btrfsic_process_written_superblock(
                                printk(KERN_INFO
                                       "btrfsic: btrfsic_map_block(@%llu,"
                                       " mirror=%d) failed!\n",
-                                      (unsigned long long)next_bytenr,
-                                      mirror_num);
+                                      next_bytenr, mirror_num);
                                return -1;
                        }
 
@@ -2579,26 +2514,22 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
                               " %u* refers to %c @%llu (%s/%llu/%d)\n",
                               recursion_level,
                               btrfsic_get_block_type(state, block),
-                              (unsigned long long)block->logical_bytenr,
-                              block->dev_state->name,
-                              (unsigned long long)block->dev_bytenr,
-                              block->mirror_num,
+                              block->logical_bytenr, block->dev_state->name,
+                              block->dev_bytenr, block->mirror_num,
                               l->ref_cnt,
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num);
                if (l->block_ref_to->never_written) {
                        printk(KERN_INFO "btrfs: attempt to write superblock"
                               " which references block %c @%llu (%s/%llu/%d)"
                               " which is never written!\n",
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num);
                        ret = -1;
                } else if (!l->block_ref_to->is_iodone) {
@@ -2606,10 +2537,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
                               " which references block %c @%llu (%s/%llu/%d)"
                               " which is not yet iodone!\n",
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num);
                        ret = -1;
                } else if (l->block_ref_to->iodone_w_error) {
@@ -2617,10 +2547,9 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
                               " which references block %c @%llu (%s/%llu/%d)"
                               " which has write error!\n",
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num);
                        ret = -1;
                } else if (l->parent_generation !=
@@ -2634,13 +2563,12 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
                               " with generation %llu !="
                               " parent generation %llu!\n",
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->dev_bytenr,
                               l->block_ref_to->mirror_num,
-                              (unsigned long long)l->block_ref_to->generation,
-                              (unsigned long long)l->parent_generation);
+                              l->block_ref_to->generation,
+                              l->parent_generation);
                        ret = -1;
                } else if (l->block_ref_to->flush_gen >
                           l->block_ref_to->dev_state->last_flush_gen) {
@@ -2650,13 +2578,10 @@ static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
                               " (block flush_gen=%llu,"
                               " dev->flush_gen=%llu)!\n",
                               btrfsic_get_block_type(state, l->block_ref_to),
-                              (unsigned long long)
                               l->block_ref_to->logical_bytenr,
                               l->block_ref_to->dev_state->name,
-                              (unsigned long long)l->block_ref_to->dev_bytenr,
-                              l->block_ref_to->mirror_num,
-                              (unsigned long long)block->flush_gen,
-                              (unsigned long long)
+                              l->block_ref_to->dev_bytenr,
+                              l->block_ref_to->mirror_num, block->flush_gen,
                               l->block_ref_to->dev_state->last_flush_gen);
                        ret = -1;
                } else if (-1 == btrfsic_check_all_ref_blocks(state,
@@ -2701,16 +2626,12 @@ static int btrfsic_is_block_ref_by_superblock(
                               " is ref %u* from %c @%llu (%s/%llu/%d)\n",
                               recursion_level,
                               btrfsic_get_block_type(state, block),
-                              (unsigned long long)block->logical_bytenr,
-                              block->dev_state->name,
-                              (unsigned long long)block->dev_bytenr,
-                              block->mirror_num,
+                              block->logical_bytenr, block->dev_state->name,
+                              block->dev_bytenr, block->mirror_num,
                               l->ref_cnt,
                               btrfsic_get_block_type(state, l->block_ref_from),
-                              (unsigned long long)
                               l->block_ref_from->logical_bytenr,
                               l->block_ref_from->dev_state->name,
-                              (unsigned long long)
                               l->block_ref_from->dev_bytenr,
                               l->block_ref_from->mirror_num);
                if (l->block_ref_from->is_superblock &&
@@ -2737,14 +2658,12 @@ static void btrfsic_print_add_link(const struct btrfsic_state *state,
               " to %c @%llu (%s/%llu/%d).\n",
               l->ref_cnt,
               btrfsic_get_block_type(state, l->block_ref_from),
-              (unsigned long long)l->block_ref_from->logical_bytenr,
+              l->block_ref_from->logical_bytenr,
               l->block_ref_from->dev_state->name,
-              (unsigned long long)l->block_ref_from->dev_bytenr,
-              l->block_ref_from->mirror_num,
+              l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
               btrfsic_get_block_type(state, l->block_ref_to),
-              (unsigned long long)l->block_ref_to->logical_bytenr,
-              l->block_ref_to->dev_state->name,
-              (unsigned long long)l->block_ref_to->dev_bytenr,
+              l->block_ref_to->logical_bytenr,
+              l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
               l->block_ref_to->mirror_num);
 }
 
@@ -2756,14 +2675,12 @@ static void btrfsic_print_rem_link(const struct btrfsic_state *state,
               " to %c @%llu (%s/%llu/%d).\n",
               l->ref_cnt,
               btrfsic_get_block_type(state, l->block_ref_from),
-              (unsigned long long)l->block_ref_from->logical_bytenr,
+              l->block_ref_from->logical_bytenr,
               l->block_ref_from->dev_state->name,
-              (unsigned long long)l->block_ref_from->dev_bytenr,
-              l->block_ref_from->mirror_num,
+              l->block_ref_from->dev_bytenr, l->block_ref_from->mirror_num,
               btrfsic_get_block_type(state, l->block_ref_to),
-              (unsigned long long)l->block_ref_to->logical_bytenr,
-              l->block_ref_to->dev_state->name,
-              (unsigned long long)l->block_ref_to->dev_bytenr,
+              l->block_ref_to->logical_bytenr,
+              l->block_ref_to->dev_state->name, l->block_ref_to->dev_bytenr,
               l->block_ref_to->mirror_num);
 }
 
@@ -2807,10 +2724,8 @@ static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
         */
        indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
                             btrfsic_get_block_type(state, block),
-                            (unsigned long long)block->logical_bytenr,
-                            block->dev_state->name,
-                            (unsigned long long)block->dev_bytenr,
-                            block->mirror_num);
+                            block->logical_bytenr, block->dev_state->name,
+                            block->dev_bytenr, block->mirror_num);
        if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
                printk("[...]\n");
                return;
@@ -2943,10 +2858,8 @@ static struct btrfsic_block *btrfsic_block_lookup_or_add(
                               "New %s%c-block @%llu (%s/%llu/%d)\n",
                               additional_string,
                               btrfsic_get_block_type(state, block),
-                              (unsigned long long)block->logical_bytenr,
-                              dev_state->name,
-                              (unsigned long long)block->dev_bytenr,
-                              mirror_num);
+                              block->logical_bytenr, dev_state->name,
+                              block->dev_bytenr, mirror_num);
                list_add(&block->all_blocks_node, &state->all_blocks_list);
                btrfsic_block_hashtable_add(block, &state->block_hashtable);
                if (NULL != was_created)
@@ -2980,7 +2893,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
                        printk(KERN_INFO "btrfsic:"
                               " btrfsic_map_block(logical @%llu,"
                               " mirror %d) failed!\n",
-                              (unsigned long long)bytenr, mirror_num);
+                              bytenr, mirror_num);
                        continue;
                }
 
@@ -2997,8 +2910,7 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
                printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
                       " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
                       " phys_bytenr=%llu)!\n",
-                      (unsigned long long)bytenr, dev_state->name,
-                      (unsigned long long)dev_bytenr);
+                      bytenr, dev_state->name, dev_bytenr);
                for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
                        ret = btrfsic_map_block(state, bytenr,
                                                state->metablock_size,
@@ -3008,10 +2920,8 @@ static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
 
                        printk(KERN_INFO "Read logical bytenr @%llu maps to"
                               " (%s/%llu/%d)\n",
-                              (unsigned long long)bytenr,
-                              block_ctx.dev->name,
-                              (unsigned long long)block_ctx.dev_bytenr,
-                              mirror_num);
+                              bytenr, block_ctx.dev->name,
+                              block_ctx.dev_bytenr, mirror_num);
                }
                WARN_ON(1);
        }
@@ -3048,12 +2958,10 @@ int btrfsic_submit_bh(int rw, struct buffer_head *bh)
                if (dev_state->state->print_mask &
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
                        printk(KERN_INFO
-                              "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
-                              " size=%lu, data=%p, bdev=%p)\n",
-                              rw, (unsigned long)bh->b_blocknr,
-                              (unsigned long long)dev_bytenr,
-                              (unsigned long)bh->b_size, bh->b_data,
-                              bh->b_bdev);
+                              "submit_bh(rw=0x%x, blocknr=%llu (bytenr %llu),"
+                              " size=%zu, data=%p, bdev=%p)\n",
+                              rw, (unsigned long long)bh->b_blocknr,
+                              dev_bytenr, bh->b_size, bh->b_data, bh->b_bdev);
                btrfsic_process_written_block(dev_state, dev_bytenr,
                                              &bh->b_data, 1, NULL,
                                              NULL, bh, rw);
@@ -3118,9 +3026,9 @@ void btrfsic_submit_bio(int rw, struct bio *bio)
                    BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
                        printk(KERN_INFO
                               "submit_bio(rw=0x%x, bi_vcnt=%u,"
-                              " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
-                              rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
-                              (unsigned long long)dev_bytenr,
+                              " bi_sector=%llu (bytenr %llu), bi_bdev=%p)\n",
+                              rw, bio->bi_vcnt,
+                              (unsigned long long)bio->bi_sector, dev_bytenr,
                               bio->bi_bdev);
 
                mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
@@ -3213,19 +3121,19 @@ int btrfsic_mount(struct btrfs_root *root,
        if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
                printk(KERN_INFO
                       "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-                      root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
+                      root->nodesize, PAGE_CACHE_SIZE);
                return -1;
        }
        if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
                printk(KERN_INFO
                       "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-                      root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
+                      root->leafsize, PAGE_CACHE_SIZE);
                return -1;
        }
        if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
                printk(KERN_INFO
                       "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
-                      root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
+                      root->sectorsize, PAGE_CACHE_SIZE);
                return -1;
        }
        state = kzalloc(sizeof(*state), GFP_NOFS);
@@ -3369,10 +3277,8 @@ void btrfsic_unmount(struct btrfs_root *root,
                               " @%llu (%s/%llu/%d) on umount which is"
                               " not yet iodone!\n",
                               btrfsic_get_block_type(state, b_all),
-                              (unsigned long long)b_all->logical_bytenr,
-                              b_all->dev_state->name,
-                              (unsigned long long)b_all->dev_bytenr,
-                              b_all->mirror_num);
+                              b_all->logical_bytenr, b_all->dev_state->name,
+                              b_all->dev_bytenr, b_all->mirror_num);
        }
 
        mutex_unlock(&btrfsic_mutex);
index b189bd1..6aad98c 100644 (file)
@@ -132,9 +132,8 @@ static int check_compressed_csum(struct inode *inode,
                        printk(KERN_INFO "btrfs csum failed ino %llu "
                               "extent %llu csum %u "
                               "wanted %u mirror %d\n",
-                              (unsigned long long)btrfs_ino(inode),
-                              (unsigned long long)disk_start,
-                              csum, *cb_sum, cb->mirror_num);
+                              btrfs_ino(inode), disk_start, csum, *cb_sum,
+                              cb->mirror_num);
                        ret = -EIO;
                        goto fail;
                }
@@ -639,7 +638,11 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        faili = nr_pages - 1;
        cb->nr_pages = nr_pages;
 
-       add_ra_bio_pages(inode, em_start + em_len, cb);
+       /* In the parent-locked case, we only locked the range we are
+        * interested in.  In all other cases, we can opportunistically
+        * cache decompressed data that goes beyond the requested range. */
+       if (!(bio_flags & EXTENT_BIO_PARENT_LOCKED))
+               add_ra_bio_pages(inode, em_start + em_len, cb);
 
        /* include any pages we added in add_ra-bio_pages */
        uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
index ed50460..6434672 100644 (file)
@@ -274,8 +274,7 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans,
        else
                btrfs_set_header_owner(cow, new_root_objectid);
 
-       write_extent_buffer(cow, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(cow),
+       write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
        WARN_ON(btrfs_header_generation(buf) > trans->transid);
@@ -484,8 +483,27 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
        struct rb_node **new;
        struct rb_node *parent = NULL;
        struct tree_mod_elem *cur;
+       int ret = 0;
+
+       BUG_ON(!tm);
+
+       tree_mod_log_write_lock(fs_info);
+       if (list_empty(&fs_info->tree_mod_seq_list)) {
+               tree_mod_log_write_unlock(fs_info);
+               /*
+                * Ok we no longer care about logging modifications, free up tm
+                * and return 0.  Any callers shouldn't be using tm after
+                * calling tree_mod_log_insert, but if they do we can just
+                * change this to return a special error code to let the callers
+                * do their own thing.
+                */
+               kfree(tm);
+               return 0;
+       }
 
-       BUG_ON(!tm || !tm->seq);
+       spin_lock(&fs_info->tree_mod_seq_lock);
+       tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
+       spin_unlock(&fs_info->tree_mod_seq_lock);
 
        tm_root = &fs_info->tree_mod_log;
        new = &tm_root->rb_node;
@@ -501,14 +519,17 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
                else if (cur->seq > tm->seq)
                        new = &((*new)->rb_right);
                else {
+                       ret = -EEXIST;
                        kfree(tm);
-                       return -EEXIST;
+                       goto out;
                }
        }
 
        rb_link_node(&tm->node, parent, new);
        rb_insert_color(&tm->node, tm_root);
-       return 0;
+out:
+       tree_mod_log_write_unlock(fs_info);
+       return ret;
 }
 
 /*
@@ -524,57 +545,19 @@ static inline int tree_mod_dont_log(struct btrfs_fs_info *fs_info,
                return 1;
        if (eb && btrfs_header_level(eb) == 0)
                return 1;
-
-       tree_mod_log_write_lock(fs_info);
-       if (list_empty(&fs_info->tree_mod_seq_list)) {
-               /*
-                * someone emptied the list while we were waiting for the lock.
-                * we must not add to the list when no blocker exists.
-                */
-               tree_mod_log_write_unlock(fs_info);
-               return 1;
-       }
-
        return 0;
 }
 
-/*
- * This allocates memory and gets a tree modification sequence number.
- *
- * Returns <0 on error.
- * Returns >0 (the added sequence number) on success.
- */
-static inline int tree_mod_alloc(struct btrfs_fs_info *fs_info, gfp_t flags,
-                                struct tree_mod_elem **tm_ret)
-{
-       struct tree_mod_elem *tm;
-
-       /*
-        * once we switch from spin locks to something different, we should
-        * honor the flags parameter here.
-        */
-       tm = *tm_ret = kzalloc(sizeof(*tm), GFP_ATOMIC);
-       if (!tm)
-               return -ENOMEM;
-
-       spin_lock(&fs_info->tree_mod_seq_lock);
-       tm->seq = btrfs_inc_tree_mod_seq_minor(fs_info);
-       spin_unlock(&fs_info->tree_mod_seq_lock);
-
-       return tm->seq;
-}
-
 static inline int
 __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
                          struct extent_buffer *eb, int slot,
                          enum mod_log_op op, gfp_t flags)
 {
-       int ret;
        struct tree_mod_elem *tm;
 
-       ret = tree_mod_alloc(fs_info, flags, &tm);
-       if (ret < 0)
-               return ret;
+       tm = kzalloc(sizeof(*tm), flags);
+       if (!tm)
+               return -ENOMEM;
 
        tm->index = eb->start >> PAGE_CACHE_SHIFT;
        if (op != MOD_LOG_KEY_ADD) {
@@ -589,34 +572,14 @@ __tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
 }
 
 static noinline int
-tree_mod_log_insert_key_mask(struct btrfs_fs_info *fs_info,
-                            struct extent_buffer *eb, int slot,
-                            enum mod_log_op op, gfp_t flags)
+tree_mod_log_insert_key(struct btrfs_fs_info *fs_info,
+                       struct extent_buffer *eb, int slot,
+                       enum mod_log_op op, gfp_t flags)
 {
-       int ret;
-
        if (tree_mod_dont_log(fs_info, eb))
                return 0;
 
-       ret = __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
-
-       tree_mod_log_write_unlock(fs_info);
-       return ret;
-}
-
-static noinline int
-tree_mod_log_insert_key(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
-                       int slot, enum mod_log_op op)
-{
-       return tree_mod_log_insert_key_mask(fs_info, eb, slot, op, GFP_NOFS);
-}
-
-static noinline int
-tree_mod_log_insert_key_locked(struct btrfs_fs_info *fs_info,
-                            struct extent_buffer *eb, int slot,
-                            enum mod_log_op op)
-{
-       return __tree_mod_log_insert_key(fs_info, eb, slot, op, GFP_NOFS);
+       return __tree_mod_log_insert_key(fs_info, eb, slot, op, flags);
 }
 
 static noinline int
@@ -637,14 +600,14 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
         * buffer, i.e. dst_slot < src_slot.
         */
        for (i = 0; i + dst_slot < src_slot && i < nr_items; i++) {
-               ret = tree_mod_log_insert_key_locked(fs_info, eb, i + dst_slot,
-                                             MOD_LOG_KEY_REMOVE_WHILE_MOVING);
+               ret = __tree_mod_log_insert_key(fs_info, eb, i + dst_slot,
+                               MOD_LOG_KEY_REMOVE_WHILE_MOVING, GFP_NOFS);
                BUG_ON(ret < 0);
        }
 
-       ret = tree_mod_alloc(fs_info, flags, &tm);
-       if (ret < 0)
-               goto out;
+       tm = kzalloc(sizeof(*tm), flags);
+       if (!tm)
+               return -ENOMEM;
 
        tm->index = eb->start >> PAGE_CACHE_SHIFT;
        tm->slot = src_slot;
@@ -652,10 +615,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
        tm->move.nr_items = nr_items;
        tm->op = MOD_LOG_MOVE_KEYS;
 
-       ret = __tree_mod_log_insert(fs_info, tm);
-out:
-       tree_mod_log_write_unlock(fs_info);
-       return ret;
+       return __tree_mod_log_insert(fs_info, tm);
 }
 
 static inline void
@@ -670,8 +630,8 @@ __tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 
        nritems = btrfs_header_nritems(eb);
        for (i = nritems - 1; i >= 0; i--) {
-               ret = tree_mod_log_insert_key_locked(fs_info, eb, i,
-                                             MOD_LOG_KEY_REMOVE_WHILE_FREEING);
+               ret = __tree_mod_log_insert_key(fs_info, eb, i,
+                               MOD_LOG_KEY_REMOVE_WHILE_FREEING, GFP_NOFS);
                BUG_ON(ret < 0);
        }
 }
@@ -683,7 +643,6 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
                         int log_removal)
 {
        struct tree_mod_elem *tm;
-       int ret;
 
        if (tree_mod_dont_log(fs_info, NULL))
                return 0;
@@ -691,9 +650,9 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        if (log_removal)
                __tree_mod_log_free_eb(fs_info, old_root);
 
-       ret = tree_mod_alloc(fs_info, flags, &tm);
-       if (ret < 0)
-               goto out;
+       tm = kzalloc(sizeof(*tm), flags);
+       if (!tm)
+               return -ENOMEM;
 
        tm->index = new_root->start >> PAGE_CACHE_SHIFT;
        tm->old_root.logical = old_root->start;
@@ -701,10 +660,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
        tm->generation = btrfs_header_generation(old_root);
        tm->op = MOD_LOG_ROOT_REPLACE;
 
-       ret = __tree_mod_log_insert(fs_info, tm);
-out:
-       tree_mod_log_write_unlock(fs_info);
-       return ret;
+       return __tree_mod_log_insert(fs_info, tm);
 }
 
 static struct tree_mod_elem *
@@ -784,23 +740,20 @@ tree_mod_log_eb_copy(struct btrfs_fs_info *fs_info, struct extent_buffer *dst,
        if (tree_mod_dont_log(fs_info, NULL))
                return;
 
-       if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0) {
-               tree_mod_log_write_unlock(fs_info);
+       if (btrfs_header_level(dst) == 0 && btrfs_header_level(src) == 0)
                return;
-       }
 
        for (i = 0; i < nr_items; i++) {
-               ret = tree_mod_log_insert_key_locked(fs_info, src,
+               ret = __tree_mod_log_insert_key(fs_info, src,
                                                i + src_offset,
-                                               MOD_LOG_KEY_REMOVE);
+                                               MOD_LOG_KEY_REMOVE, GFP_NOFS);
                BUG_ON(ret < 0);
-               ret = tree_mod_log_insert_key_locked(fs_info, dst,
+               ret = __tree_mod_log_insert_key(fs_info, dst,
                                                     i + dst_offset,
-                                                    MOD_LOG_KEY_ADD);
+                                                    MOD_LOG_KEY_ADD,
+                                                    GFP_NOFS);
                BUG_ON(ret < 0);
        }
-
-       tree_mod_log_write_unlock(fs_info);
 }
 
 static inline void
@@ -819,9 +772,9 @@ tree_mod_log_set_node_key(struct btrfs_fs_info *fs_info,
 {
        int ret;
 
-       ret = tree_mod_log_insert_key_mask(fs_info, eb, slot,
-                                          MOD_LOG_KEY_REPLACE,
-                                          atomic ? GFP_ATOMIC : GFP_NOFS);
+       ret = __tree_mod_log_insert_key(fs_info, eb, slot,
+                                       MOD_LOG_KEY_REPLACE,
+                                       atomic ? GFP_ATOMIC : GFP_NOFS);
        BUG_ON(ret < 0);
 }
 
@@ -830,10 +783,7 @@ tree_mod_log_free_eb(struct btrfs_fs_info *fs_info, struct extent_buffer *eb)
 {
        if (tree_mod_dont_log(fs_info, eb))
                return;
-
        __tree_mod_log_free_eb(fs_info, eb);
-
-       tree_mod_log_write_unlock(fs_info);
 }
 
 static noinline void
@@ -1046,8 +996,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
        else
                btrfs_set_header_owner(cow, root->root_key.objectid);
 
-       write_extent_buffer(cow, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(cow),
+       write_extent_buffer(cow, root->fs_info->fsid, btrfs_header_fsid(cow),
                            BTRFS_FSID_SIZE);
 
        ret = update_ref_for_cow(trans, root, buf, cow, &last_ref);
@@ -1083,7 +1032,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
 
                WARN_ON(trans->transid != btrfs_header_generation(parent));
                tree_mod_log_insert_key(root->fs_info, parent, parent_slot,
-                                       MOD_LOG_KEY_REPLACE);
+                                       MOD_LOG_KEY_REPLACE, GFP_NOFS);
                btrfs_set_node_blockptr(parent, parent_slot,
                                        cow->start);
                btrfs_set_node_ptr_generation(parent, parent_slot,
@@ -1116,7 +1065,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
        int looped = 0;
 
        if (!time_seq)
-               return 0;
+               return NULL;
 
        /*
         * the very last operation that's logged for a root is the replacement
@@ -1127,7 +1076,7 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
                tm = tree_mod_log_search_oldest(fs_info, root_logical,
                                                time_seq);
                if (!looped && !tm)
-                       return 0;
+                       return NULL;
                /*
                 * if there are no tree operation for the oldest root, we simply
                 * return it. this should only happen if that (old) root is at
@@ -1240,8 +1189,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
  * is freed (its refcount is decremented).
  */
 static struct extent_buffer *
-tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
-                   u64 time_seq)
+tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
+                   struct extent_buffer *eb, u64 time_seq)
 {
        struct extent_buffer *eb_rewin;
        struct tree_mod_elem *tm;
@@ -1256,11 +1205,18 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
        if (!tm)
                return eb;
 
+       btrfs_set_path_blocking(path);
+       btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+
        if (tm->op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
                BUG_ON(tm->slot != 0);
                eb_rewin = alloc_dummy_extent_buffer(eb->start,
                                                fs_info->tree_root->nodesize);
-               BUG_ON(!eb_rewin);
+               if (!eb_rewin) {
+                       btrfs_tree_read_unlock_blocking(eb);
+                       free_extent_buffer(eb);
+                       return NULL;
+               }
                btrfs_set_header_bytenr(eb_rewin, eb->start);
                btrfs_set_header_backref_rev(eb_rewin,
                                             btrfs_header_backref_rev(eb));
@@ -1268,10 +1224,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
                btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
        } else {
                eb_rewin = btrfs_clone_extent_buffer(eb);
-               BUG_ON(!eb_rewin);
+               if (!eb_rewin) {
+                       btrfs_tree_read_unlock_blocking(eb);
+                       free_extent_buffer(eb);
+                       return NULL;
+               }
        }
 
-       btrfs_tree_read_unlock(eb);
+       btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
+       btrfs_tree_read_unlock_blocking(eb);
        free_extent_buffer(eb);
 
        extent_buffer_get(eb_rewin);
@@ -1335,8 +1296,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                free_extent_buffer(eb_root);
                eb = alloc_dummy_extent_buffer(logical, root->nodesize);
        } else {
+               btrfs_set_lock_blocking_rw(eb_root, BTRFS_READ_LOCK);
                eb = btrfs_clone_extent_buffer(eb_root);
-               btrfs_tree_read_unlock(eb_root);
+               btrfs_tree_read_unlock_blocking(eb_root);
                free_extent_buffer(eb_root);
        }
 
@@ -1419,14 +1381,12 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans,
 
        if (trans->transaction != root->fs_info->running_transaction)
                WARN(1, KERN_CRIT "trans %llu running %llu\n",
-                      (unsigned long long)trans->transid,
-                      (unsigned long long)
+                      trans->transid,
                       root->fs_info->running_transaction->transid);
 
        if (trans->transid != root->fs_info->generation)
                WARN(1, KERN_CRIT "trans %llu running %llu\n",
-                      (unsigned long long)trans->transid,
-                      (unsigned long long)root->fs_info->generation);
+                      trans->transid, root->fs_info->generation);
 
        if (!should_cow_block(trans, root, buf)) {
                *cow_ret = buf;
@@ -2466,6 +2426,40 @@ done:
        return ret;
 }
 
+static void key_search_validate(struct extent_buffer *b,
+                               struct btrfs_key *key,
+                               int level)
+{
+#ifdef CONFIG_BTRFS_ASSERT
+       struct btrfs_disk_key disk_key;
+
+       btrfs_cpu_key_to_disk(&disk_key, key);
+
+       if (level == 0)
+               ASSERT(!memcmp_extent_buffer(b, &disk_key,
+                   offsetof(struct btrfs_leaf, items[0].key),
+                   sizeof(disk_key)));
+       else
+               ASSERT(!memcmp_extent_buffer(b, &disk_key,
+                   offsetof(struct btrfs_node, ptrs[0].key),
+                   sizeof(disk_key)));
+#endif
+}
+
+static int key_search(struct extent_buffer *b, struct btrfs_key *key,
+                     int level, int *prev_cmp, int *slot)
+{
+       if (*prev_cmp != 0) {
+               *prev_cmp = bin_search(b, key, level, slot);
+               return *prev_cmp;
+       }
+
+       key_search_validate(b, key, level);
+       *slot = 0;
+
+       return 0;
+}
+
 /*
  * look for key in the tree.  path is filled in with nodes along the way
  * if key is found, we return zero and you can find the item in the leaf
@@ -2494,6 +2488,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        int write_lock_level = 0;
        u8 lowest_level = 0;
        int min_write_lock_level;
+       int prev_cmp;
 
        lowest_level = p->lowest_level;
        WARN_ON(lowest_level && ins_len > 0);
@@ -2524,6 +2519,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
        min_write_lock_level = write_lock_level;
 
 again:
+       prev_cmp = -1;
        /*
         * we try very hard to do read locks on the root
         */
@@ -2624,7 +2620,7 @@ cow_done:
                if (!cow)
                        btrfs_unlock_up_safe(p, level + 1);
 
-               ret = bin_search(b, key, level, &slot);
+               ret = key_search(b, key, level, &prev_cmp, &slot);
 
                if (level != 0) {
                        int dec = 0;
@@ -2759,6 +2755,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
        int level;
        int lowest_unlock = 1;
        u8 lowest_level = 0;
+       int prev_cmp;
 
        lowest_level = p->lowest_level;
        WARN_ON(p->nodes[0] != NULL);
@@ -2769,6 +2766,7 @@ int btrfs_search_old_slot(struct btrfs_root *root, struct btrfs_key *key,
        }
 
 again:
+       prev_cmp = -1;
        b = get_old_root(root, time_seq);
        level = btrfs_header_level(b);
        p->locks[level] = BTRFS_READ_LOCK;
@@ -2786,7 +2784,7 @@ again:
                 */
                btrfs_unlock_up_safe(p, level + 1);
 
-               ret = bin_search(b, key, level, &slot);
+               ret = key_search(b, key, level, &prev_cmp, &slot);
 
                if (level != 0) {
                        int dec = 0;
@@ -2820,7 +2818,11 @@ again:
                                btrfs_clear_path_blocking(p, b,
                                                          BTRFS_READ_LOCK);
                        }
-                       b = tree_mod_log_rewind(root->fs_info, b, time_seq);
+                       b = tree_mod_log_rewind(root->fs_info, p, b, time_seq);
+                       if (!b) {
+                               ret = -ENOMEM;
+                               goto done;
+                       }
                        p->locks[level] = BTRFS_READ_LOCK;
                        p->nodes[level] = b;
                } else {
@@ -3143,13 +3145,11 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans,
        btrfs_set_header_backref_rev(c, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(c, root->root_key.objectid);
 
-       write_extent_buffer(c, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(c),
+       write_extent_buffer(c, root->fs_info->fsid, btrfs_header_fsid(c),
                            BTRFS_FSID_SIZE);
 
        write_extent_buffer(c, root->fs_info->chunk_tree_uuid,
-                           (unsigned long)btrfs_header_chunk_tree_uuid(c),
-                           BTRFS_UUID_SIZE);
+                           btrfs_header_chunk_tree_uuid(c), BTRFS_UUID_SIZE);
 
        btrfs_set_node_key(c, &lower_key, 0);
        btrfs_set_node_blockptr(c, 0, lower->start);
@@ -3208,7 +3208,7 @@ static void insert_ptr(struct btrfs_trans_handle *trans,
        }
        if (level) {
                ret = tree_mod_log_insert_key(root->fs_info, lower, slot,
-                                             MOD_LOG_KEY_ADD);
+                                             MOD_LOG_KEY_ADD, GFP_NOFS);
                BUG_ON(ret < 0);
        }
        btrfs_set_node_key(lower, key, slot);
@@ -3284,10 +3284,9 @@ static noinline int split_node(struct btrfs_trans_handle *trans,
        btrfs_set_header_backref_rev(split, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(split, root->root_key.objectid);
        write_extent_buffer(split, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(split),
-                           BTRFS_FSID_SIZE);
+                           btrfs_header_fsid(split), BTRFS_FSID_SIZE);
        write_extent_buffer(split, root->fs_info->chunk_tree_uuid,
-                           (unsigned long)btrfs_header_chunk_tree_uuid(split),
+                           btrfs_header_chunk_tree_uuid(split),
                            BTRFS_UUID_SIZE);
 
        tree_mod_log_eb_copy(root->fs_info, split, c, 0, mid, c_nritems - mid);
@@ -4040,11 +4039,10 @@ again:
        btrfs_set_header_owner(right, root->root_key.objectid);
        btrfs_set_header_level(right, 0);
        write_extent_buffer(right, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(right),
-                           BTRFS_FSID_SIZE);
+                           btrfs_header_fsid(right), BTRFS_FSID_SIZE);
 
        write_extent_buffer(right, root->fs_info->chunk_tree_uuid,
-                           (unsigned long)btrfs_header_chunk_tree_uuid(right),
+                           btrfs_header_chunk_tree_uuid(right),
                            BTRFS_UUID_SIZE);
 
        if (split == 0) {
@@ -4642,7 +4640,7 @@ static void del_ptr(struct btrfs_root *root, struct btrfs_path *path,
                              (nritems - slot - 1));
        } else if (level) {
                ret = tree_mod_log_insert_key(root->fs_info, parent, slot,
-                                             MOD_LOG_KEY_REMOVE);
+                                             MOD_LOG_KEY_REMOVE, GFP_NOFS);
                BUG_ON(ret < 0);
        }
 
@@ -4814,7 +4812,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root,
  * This may release the path, and so you may lose any locks held at the
  * time you call it.
  */
-int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
+static int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path)
 {
        struct btrfs_key key;
        struct btrfs_disk_key found_key;
@@ -5329,19 +5327,20 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
                                        goto out;
                                advance_right = ADVANCE;
                        } else {
+                               enum btrfs_compare_tree_result cmp;
+
                                WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
                                ret = tree_compare_item(left_root, left_path,
                                                right_path, tmp_buf);
-                               if (ret) {
-                                       WARN_ON(!extent_buffer_uptodate(left_path->nodes[0]));
-                                       ret = changed_cb(left_root, right_root,
-                                               left_path, right_path,
-                                               &left_key,
-                                               BTRFS_COMPARE_TREE_CHANGED,
-                                               ctx);
-                                       if (ret < 0)
-                                               goto out;
-                               }
+                               if (ret)
+                                       cmp = BTRFS_COMPARE_TREE_CHANGED;
+                               else
+                                       cmp = BTRFS_COMPARE_TREE_SAME;
+                               ret = changed_cb(left_root, right_root,
+                                                left_path, right_path,
+                                                &left_key, cmp, ctx);
+                               if (ret < 0)
+                                       goto out;
                                advance_left = ADVANCE;
                                advance_right = ADVANCE;
                        }
index e795bf1..3c1da6f 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/highmem.h>
 #include <linux/fs.h>
 #include <linux/rwsem.h>
+#include <linux/semaphore.h>
 #include <linux/completion.h>
 #include <linux/backing-dev.h>
 #include <linux/wait.h>
@@ -91,6 +92,9 @@ struct btrfs_ordered_sum;
 /* holds quota configuration and tracking */
 #define BTRFS_QUOTA_TREE_OBJECTID 8ULL
 
+/* for storing items that use the BTRFS_UUID_KEY* types */
+#define BTRFS_UUID_TREE_OBJECTID 9ULL
+
 /* for storing balance parameters in the root tree */
 #define BTRFS_BALANCE_OBJECTID -4ULL
 
@@ -142,7 +146,7 @@ struct btrfs_ordered_sum;
 
 #define BTRFS_EMPTY_SUBVOL_DIR_OBJECTID 2
 
-#define BTRFS_DEV_REPLACE_DEVID 0
+#define BTRFS_DEV_REPLACE_DEVID 0ULL
 
 /*
  * the max metadata block size.  This limit is somewhat artificial,
@@ -478,9 +482,10 @@ struct btrfs_super_block {
        char label[BTRFS_LABEL_SIZE];
 
        __le64 cache_generation;
+       __le64 uuid_tree_generation;
 
        /* future expansion */
-       __le64 reserved[31];
+       __le64 reserved[30];
        u8 sys_chunk_array[BTRFS_SYSTEM_CHUNK_ARRAY_SIZE];
        struct btrfs_root_backup super_roots[BTRFS_NUM_BACKUP_ROOTS];
 } __attribute__ ((__packed__));
@@ -1188,6 +1193,7 @@ enum btrfs_caching_type {
        BTRFS_CACHE_STARTED     = 1,
        BTRFS_CACHE_FAST        = 2,
        BTRFS_CACHE_FINISHED    = 3,
+       BTRFS_CACHE_ERROR       = 4,
 };
 
 enum btrfs_disk_cache_state {
@@ -1302,6 +1308,7 @@ struct btrfs_fs_info {
        struct btrfs_root *fs_root;
        struct btrfs_root *csum_root;
        struct btrfs_root *quota_root;
+       struct btrfs_root *uuid_root;
 
        /* the log root tree is a directory of all the other log roots */
        struct btrfs_root *log_root_tree;
@@ -1350,6 +1357,7 @@ struct btrfs_fs_info {
        u64 last_trans_log_full_commit;
        unsigned long mount_opt;
        unsigned long compress_type:4;
+       int commit_interval;
        /*
         * It is a suggestive number, the read side is safe even it gets a
         * wrong number because we will write out the data into a regular
@@ -1411,6 +1419,13 @@ struct btrfs_fs_info {
         * before jumping into the main commit.
         */
        struct mutex ordered_operations_mutex;
+
+       /*
+        * Same as ordered_operations_mutex except this is for ordered extents
+        * and not the operations.
+        */
+       struct mutex ordered_extent_flush_mutex;
+
        struct rw_semaphore extent_commit_sem;
 
        struct rw_semaphore cleanup_work_sem;
@@ -1641,6 +1656,9 @@ struct btrfs_fs_info {
        struct btrfs_dev_replace dev_replace;
 
        atomic_t mutually_exclusive_operation_running;
+
+       struct semaphore uuid_tree_rescan_sem;
+       unsigned int update_uuid_tree_gen:1;
 };
 
 /*
@@ -1933,6 +1951,19 @@ struct btrfs_ioctl_defrag_range_args {
  */
 #define BTRFS_DEV_REPLACE_KEY  250
 
+/*
+ * Stores items that allow to quickly map UUIDs to something else.
+ * These items are part of the filesystem UUID tree.
+ * The key is built like this:
+ * (UUID_upper_64_bits, BTRFS_UUID_KEY*, UUID_lower_64_bits).
+ */
+#if BTRFS_UUID_SIZE != 16
+#error "UUID items require BTRFS_UUID_SIZE == 16!"
+#endif
+#define BTRFS_UUID_KEY_SUBVOL  251     /* for UUIDs assigned to subvols */
+#define BTRFS_UUID_KEY_RECEIVED_SUBVOL 252     /* for UUIDs assigned to
+                                                * received subvols */
+
 /*
  * string items are for debugging.  They just store a short string of
  * data in the FS
@@ -1967,6 +1998,9 @@ struct btrfs_ioctl_defrag_range_args {
 #define BTRFS_MOUNT_CHECK_INTEGRITY    (1 << 20)
 #define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
 #define BTRFS_MOUNT_PANIC_ON_FATAL_ERROR       (1 << 22)
+#define BTRFS_MOUNT_RESCAN_UUID_TREE   (1 << 23)
+
+#define BTRFS_DEFAULT_COMMIT_INTERVAL  (30)
 
 #define btrfs_clear_opt(o, opt)                ((o) &= ~BTRFS_MOUNT_##opt)
 #define btrfs_set_opt(o, opt)          ((o) |= BTRFS_MOUNT_##opt)
@@ -2130,14 +2164,14 @@ BTRFS_SETGET_STACK_FUNCS(stack_device_bandwidth, struct btrfs_dev_item,
 BTRFS_SETGET_STACK_FUNCS(stack_device_generation, struct btrfs_dev_item,
                         generation, 64);
 
-static inline char *btrfs_device_uuid(struct btrfs_dev_item *d)
+static inline unsigned long btrfs_device_uuid(struct btrfs_dev_item *d)
 {
-       return (char *)d + offsetof(struct btrfs_dev_item, uuid);
+       return (unsigned long)d + offsetof(struct btrfs_dev_item, uuid);
 }
 
-static inline char *btrfs_device_fsid(struct btrfs_dev_item *d)
+static inline unsigned long btrfs_device_fsid(struct btrfs_dev_item *d)
 {
-       return (char *)d + offsetof(struct btrfs_dev_item, fsid);
+       return (unsigned long)d + offsetof(struct btrfs_dev_item, fsid);
 }
 
 BTRFS_SETGET_FUNCS(chunk_length, struct btrfs_chunk, length, 64);
@@ -2240,6 +2274,23 @@ BTRFS_SETGET_FUNCS(inode_gid, struct btrfs_inode_item, gid, 32);
 BTRFS_SETGET_FUNCS(inode_mode, struct btrfs_inode_item, mode, 32);
 BTRFS_SETGET_FUNCS(inode_rdev, struct btrfs_inode_item, rdev, 64);
 BTRFS_SETGET_FUNCS(inode_flags, struct btrfs_inode_item, flags, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
+                        sequence, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
+                        transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
+                        nbytes, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
+                        block_group, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
 
 static inline struct btrfs_timespec *
 btrfs_inode_atime(struct btrfs_inode_item *inode_item)
@@ -2267,6 +2318,8 @@ btrfs_inode_ctime(struct btrfs_inode_item *inode_item)
 
 BTRFS_SETGET_FUNCS(timespec_sec, struct btrfs_timespec, sec, 64);
 BTRFS_SETGET_FUNCS(timespec_nsec, struct btrfs_timespec, nsec, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
 
 /* struct btrfs_dev_extent */
 BTRFS_SETGET_FUNCS(dev_extent_chunk_tree, struct btrfs_dev_extent,
@@ -2277,10 +2330,10 @@ BTRFS_SETGET_FUNCS(dev_extent_chunk_offset, struct btrfs_dev_extent,
                   chunk_offset, 64);
 BTRFS_SETGET_FUNCS(dev_extent_length, struct btrfs_dev_extent, length, 64);
 
-static inline u8 *btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
+static inline unsigned long btrfs_dev_extent_chunk_tree_uuid(struct btrfs_dev_extent *dev)
 {
        unsigned long ptr = offsetof(struct btrfs_dev_extent, chunk_tree_uuid);
-       return (u8 *)((unsigned long)dev + ptr);
+       return (unsigned long)dev + ptr;
 }
 
 BTRFS_SETGET_FUNCS(extent_refs, struct btrfs_extent_item, refs, 64);
@@ -2348,6 +2401,10 @@ BTRFS_SETGET_FUNCS(ref_count_v0, struct btrfs_extent_ref_v0, count, 32);
 /* struct btrfs_node */
 BTRFS_SETGET_FUNCS(key_blockptr, struct btrfs_key_ptr, blockptr, 64);
 BTRFS_SETGET_FUNCS(key_generation, struct btrfs_key_ptr, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_key_blockptr, struct btrfs_key_ptr,
+                        blockptr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_key_generation, struct btrfs_key_ptr,
+                        generation, 64);
 
 static inline u64 btrfs_node_blockptr(struct extent_buffer *eb, int nr)
 {
@@ -2404,6 +2461,8 @@ static inline void btrfs_set_node_key(struct extent_buffer *eb,
 /* struct btrfs_item */
 BTRFS_SETGET_FUNCS(item_offset, struct btrfs_item, offset, 32);
 BTRFS_SETGET_FUNCS(item_size, struct btrfs_item, size, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_item_offset, struct btrfs_item, offset, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_item_size, struct btrfs_item, size, 32);
 
 static inline unsigned long btrfs_item_nr_offset(int nr)
 {
@@ -2466,6 +2525,13 @@ BTRFS_SETGET_FUNCS(dir_data_len, struct btrfs_dir_item, data_len, 16);
 BTRFS_SETGET_FUNCS(dir_type, struct btrfs_dir_item, type, 8);
 BTRFS_SETGET_FUNCS(dir_name_len, struct btrfs_dir_item, name_len, 16);
 BTRFS_SETGET_FUNCS(dir_transid, struct btrfs_dir_item, transid, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_type, struct btrfs_dir_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_data_len, struct btrfs_dir_item,
+                        data_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_name_len, struct btrfs_dir_item,
+                        name_len, 16);
+BTRFS_SETGET_STACK_FUNCS(stack_dir_transid, struct btrfs_dir_item,
+                        transid, 64);
 
 static inline void btrfs_dir_item_key(struct extent_buffer *eb,
                                      struct btrfs_dir_item *item,
@@ -2568,6 +2634,12 @@ BTRFS_SETGET_HEADER_FUNCS(header_owner, struct btrfs_header, owner, 64);
 BTRFS_SETGET_HEADER_FUNCS(header_nritems, struct btrfs_header, nritems, 32);
 BTRFS_SETGET_HEADER_FUNCS(header_flags, struct btrfs_header, flags, 64);
 BTRFS_SETGET_HEADER_FUNCS(header_level, struct btrfs_header, level, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_header_generation, struct btrfs_header,
+                        generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_owner, struct btrfs_header, owner, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_header_nritems, struct btrfs_header,
+                        nritems, 32);
+BTRFS_SETGET_STACK_FUNCS(stack_header_bytenr, struct btrfs_header, bytenr, 64);
 
 static inline int btrfs_header_flag(struct extent_buffer *eb, u64 flag)
 {
@@ -2603,16 +2675,14 @@ static inline void btrfs_set_header_backref_rev(struct extent_buffer *eb,
        btrfs_set_header_flags(eb, flags);
 }
 
-static inline u8 *btrfs_header_fsid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_fsid(struct extent_buffer *eb)
 {
-       unsigned long ptr = offsetof(struct btrfs_header, fsid);
-       return (u8 *)ptr;
+       return offsetof(struct btrfs_header, fsid);
 }
 
-static inline u8 *btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
+static inline unsigned long btrfs_header_chunk_tree_uuid(struct extent_buffer *eb)
 {
-       unsigned long ptr = offsetof(struct btrfs_header, chunk_tree_uuid);
-       return (u8 *)ptr;
+       return offsetof(struct btrfs_header, chunk_tree_uuid);
 }
 
 static inline int btrfs_is_leaf(struct extent_buffer *eb)
@@ -2830,6 +2900,9 @@ BTRFS_SETGET_STACK_FUNCS(super_csum_type, struct btrfs_super_block,
                         csum_type, 16);
 BTRFS_SETGET_STACK_FUNCS(super_cache_generation, struct btrfs_super_block,
                         cache_generation, 64);
+BTRFS_SETGET_STACK_FUNCS(super_magic, struct btrfs_super_block, magic, 64);
+BTRFS_SETGET_STACK_FUNCS(super_uuid_tree_generation, struct btrfs_super_block,
+                        uuid_tree_generation, 64);
 
 static inline int btrfs_super_csum_size(struct btrfs_super_block *s)
 {
@@ -2847,6 +2920,14 @@ static inline unsigned long btrfs_leaf_data(struct extent_buffer *l)
 
 /* struct btrfs_file_extent_item */
 BTRFS_SETGET_FUNCS(file_extent_type, struct btrfs_file_extent_item, type, 8);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_disk_bytenr,
+                        struct btrfs_file_extent_item, disk_bytenr, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_offset,
+                        struct btrfs_file_extent_item, offset, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_generation,
+                        struct btrfs_file_extent_item, generation, 64);
+BTRFS_SETGET_STACK_FUNCS(stack_file_extent_num_bytes,
+                        struct btrfs_file_extent_item, num_bytes, 64);
 
 static inline unsigned long
 btrfs_file_extent_inline_start(struct btrfs_file_extent_item *e)
@@ -3107,11 +3188,9 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   u64 root_objectid, u64 owner, u64 offset,
                                   struct btrfs_key *ins);
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                                 struct btrfs_root *root,
-                                 u64 num_bytes, u64 min_alloc_size,
-                                 u64 empty_size, u64 hint_byte,
-                                 struct btrfs_key *ins, int is_data);
+int btrfs_reserve_extent(struct btrfs_root *root, u64 num_bytes,
+                        u64 min_alloc_size, u64 empty_size, u64 hint_byte,
+                        struct btrfs_key *ins, int is_data);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
                  struct extent_buffer *buf, int full_backref, int for_cow);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
@@ -3175,7 +3254,7 @@ void btrfs_orphan_release_metadata(struct inode *inode);
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int nitems,
-                                    u64 *qgroup_reserved);
+                                    u64 *qgroup_reserved, bool use_global_rsv);
 void btrfs_subvolume_release_metadata(struct btrfs_root *root,
                                      struct btrfs_block_rsv *rsv,
                                      u64 qgroup_reserved);
@@ -3245,6 +3324,7 @@ enum btrfs_compare_tree_result {
        BTRFS_COMPARE_TREE_NEW,
        BTRFS_COMPARE_TREE_DELETED,
        BTRFS_COMPARE_TREE_CHANGED,
+       BTRFS_COMPARE_TREE_SAME,
 };
 typedef int (*btrfs_changed_cb_t)(struct btrfs_root *left_root,
                                  struct btrfs_root *right_root,
@@ -3380,6 +3460,7 @@ static inline void free_fs_info(struct btrfs_fs_info *fs_info)
        kfree(fs_info->dev_root);
        kfree(fs_info->csum_root);
        kfree(fs_info->quota_root);
+       kfree(fs_info->uuid_root);
        kfree(fs_info->super_copy);
        kfree(fs_info->super_for_commit);
        kfree(fs_info);
@@ -3414,8 +3495,6 @@ int __must_check btrfs_update_root(struct btrfs_trans_handle *trans,
                                   struct btrfs_root *root,
                                   struct btrfs_key *key,
                                   struct btrfs_root_item *item);
-void btrfs_read_root_item(struct extent_buffer *eb, int slot,
-                         struct btrfs_root_item *item);
 int btrfs_find_root(struct btrfs_root *root, struct btrfs_key *search_key,
                    struct btrfs_path *path, struct btrfs_root_item *root_item,
                    struct btrfs_key *root_key);
@@ -3426,6 +3505,17 @@ void btrfs_check_and_init_root_item(struct btrfs_root_item *item);
 void btrfs_update_root_times(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root);
 
+/* uuid-tree.c */
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+                       u64 subid);
+int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+                       u64 subid);
+int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
+                           int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
+                                             u64));
+
 /* dir-item.c */
 int btrfs_check_dir_item_collision(struct btrfs_root *root, u64 dir,
                          const char *name, int name_len);
@@ -3509,12 +3599,14 @@ int btrfs_find_name_in_ext_backref(struct btrfs_path *path,
                                   struct btrfs_inode_extref **extref_ret);
 
 /* file-item.c */
+struct btrfs_dio_private;
 int btrfs_del_csums(struct btrfs_trans_handle *trans,
                    struct btrfs_root *root, u64 bytenr, u64 len);
 int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
                          struct bio *bio, u32 *dst);
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 logical_offset);
+                             struct btrfs_dio_private *dip, struct bio *bio,
+                             u64 logical_offset);
 int btrfs_insert_file_extent(struct btrfs_trans_handle *trans,
                             struct btrfs_root *root,
                             u64 objectid, u64 pos,
@@ -3552,8 +3644,7 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work);
 struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *page,
                                           size_t pg_offset, u64 start, u64 len,
                                           int create);
-noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
-                             struct inode *inode, u64 offset, u64 *len,
+noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
                              u64 *ram_bytes);
 
@@ -3643,11 +3734,15 @@ extern const struct dentry_operations btrfs_dentry_operations;
 long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
 void btrfs_update_iflags(struct inode *inode);
 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
+int btrfs_is_empty_uuid(u8 *uuid);
 int btrfs_defrag_file(struct inode *inode, struct file *file,
                      struct btrfs_ioctl_defrag_range_args *range,
                      u64 newer_than, unsigned long max_pages);
 void btrfs_get_block_group_info(struct list_head *groups_list,
                                struct btrfs_ioctl_space_info *space);
+void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
+                              struct btrfs_ioctl_balance_args *bargs);
+
 
 /* file.c */
 int btrfs_auto_defrag_init(void);
@@ -3720,6 +3815,22 @@ void btrfs_printk(const struct btrfs_fs_info *fs_info, const char *fmt, ...)
 #define btrfs_debug(fs_info, fmt, args...) \
        btrfs_printk(fs_info, KERN_DEBUG fmt, ##args)
 
+#ifdef CONFIG_BTRFS_ASSERT
+
+static inline void assfail(char *expr, char *file, int line)
+{
+       printk(KERN_ERR "BTRFS assertion failed: %s, file: %s, line: %d",
+              expr, file, line);
+       BUG();
+}
+
+#define ASSERT(expr)   \
+       (likely(expr) ? (void)0 : assfail(#expr, __FILE__, __LINE__))
+#else
+#define ASSERT(expr)   ((void)0)
+#endif
+
+#define btrfs_assert()
 __printf(5, 6)
 void __btrfs_std_error(struct btrfs_fs_info *fs_info, const char *function,
                     unsigned int line, int errno, const char *fmt, ...);
index 3755109..cbd9523 100644 (file)
@@ -21,6 +21,7 @@
 #include "delayed-inode.h"
 #include "disk-io.h"
 #include "transaction.h"
+#include "ctree.h"
 
 #define BTRFS_DELAYED_WRITEBACK                512
 #define BTRFS_DELAYED_BACKGROUND       128
@@ -1453,10 +1454,10 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
 
        dir_item = (struct btrfs_dir_item *)delayed_item->data;
        dir_item->location = *disk_key;
-       dir_item->transid = cpu_to_le64(trans->transid);
-       dir_item->data_len = 0;
-       dir_item->name_len = cpu_to_le16(name_len);
-       dir_item->type = type;
+       btrfs_set_stack_dir_transid(dir_item, trans->transid);
+       btrfs_set_stack_dir_data_len(dir_item, 0);
+       btrfs_set_stack_dir_name_len(dir_item, name_len);
+       btrfs_set_stack_dir_type(dir_item, type);
        memcpy((char *)(dir_item + 1), name, name_len);
 
        ret = btrfs_delayed_item_reserve_metadata(trans, root, delayed_item);
@@ -1470,13 +1471,11 @@ int btrfs_insert_delayed_dir_index(struct btrfs_trans_handle *trans,
        mutex_lock(&delayed_node->mutex);
        ret = __btrfs_add_delayed_insertion_item(delayed_node, delayed_item);
        if (unlikely(ret)) {
-               printk(KERN_ERR "err add delayed dir index item(name: %s) into "
-                               "the insertion tree of the delayed node"
+               printk(KERN_ERR "err add delayed dir index item(name: %.*s) "
+                               "into the insertion tree of the delayed node"
                                "(root id: %llu, inode id: %llu, errno: %d)\n",
-                               name,
-                               (unsigned long long)delayed_node->root->objectid,
-                               (unsigned long long)delayed_node->inode_id,
-                               ret);
+                               name_len, name, delayed_node->root->objectid,
+                               delayed_node->inode_id, ret);
                BUG();
        }
        mutex_unlock(&delayed_node->mutex);
@@ -1547,9 +1546,7 @@ int btrfs_delete_delayed_dir_index(struct btrfs_trans_handle *trans,
                printk(KERN_ERR "err add delayed dir index item(index: %llu) "
                                "into the deletion tree of the delayed node"
                                "(root id: %llu, inode id: %llu, errno: %d)\n",
-                               (unsigned long long)index,
-                               (unsigned long long)node->root->objectid,
-                               (unsigned long long)node->inode_id,
+                               index, node->root->objectid, node->inode_id,
                                ret);
                BUG();
        }
@@ -1699,7 +1696,7 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
 
                di = (struct btrfs_dir_item *)curr->data;
                name = (char *)(di + 1);
-               name_len = le16_to_cpu(di->name_len);
+               name_len = btrfs_stack_dir_name_len(di);
 
                d_type = btrfs_filetype_table[di->type];
                btrfs_disk_key_to_cpu(&location, &di->location);
@@ -1716,27 +1713,6 @@ int btrfs_readdir_delayed_dir_index(struct dir_context *ctx,
        return 0;
 }
 
-BTRFS_SETGET_STACK_FUNCS(stack_inode_generation, struct btrfs_inode_item,
-                        generation, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_sequence, struct btrfs_inode_item,
-                        sequence, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_transid, struct btrfs_inode_item,
-                        transid, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_size, struct btrfs_inode_item, size, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nbytes, struct btrfs_inode_item,
-                        nbytes, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_block_group, struct btrfs_inode_item,
-                        block_group, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_nlink, struct btrfs_inode_item, nlink, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_uid, struct btrfs_inode_item, uid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_gid, struct btrfs_inode_item, gid, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_mode, struct btrfs_inode_item, mode, 32);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_rdev, struct btrfs_inode_item, rdev, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_inode_flags, struct btrfs_inode_item, flags, 64);
-
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_sec, struct btrfs_timespec, sec, 64);
-BTRFS_SETGET_STACK_FUNCS(stack_timespec_nsec, struct btrfs_timespec, nsec, 32);
-
 static void fill_stack_inode_item(struct btrfs_trans_handle *trans,
                                  struct btrfs_inode_item *inode_item,
                                  struct inode *inode)
index c219463..e4d467b 100644 (file)
@@ -241,7 +241,7 @@ int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-static void inline drop_delayed_ref(struct btrfs_trans_handle *trans,
+static inline void drop_delayed_ref(struct btrfs_trans_handle *trans,
                                    struct btrfs_delayed_ref_root *delayed_refs,
                                    struct btrfs_delayed_ref_node *ref)
 {
@@ -600,7 +600,7 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info,
        INIT_LIST_HEAD(&head_ref->cluster);
        mutex_init(&head_ref->mutex);
 
-       trace_btrfs_delayed_ref_head(ref, head_ref, action);
+       trace_add_delayed_ref_head(ref, head_ref, action);
 
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
@@ -661,7 +661,7 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info,
                ref->type = BTRFS_TREE_BLOCK_REF_KEY;
        full_ref->level = level;
 
-       trace_btrfs_delayed_tree_ref(ref, full_ref, action);
+       trace_add_delayed_tree_ref(ref, full_ref, action);
 
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
@@ -722,7 +722,7 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info,
        full_ref->objectid = owner;
        full_ref->offset = offset;
 
-       trace_btrfs_delayed_data_ref(ref, full_ref, action);
+       trace_add_delayed_data_ref(ref, full_ref, action);
 
        existing = tree_insert(&delayed_refs->root, &ref->rb_node);
 
index 5f8f334..a644353 100644 (file)
@@ -148,13 +148,13 @@ no_valid_dev_replace_entry_found:
                    !btrfs_test_opt(dev_root, DEGRADED)) {
                        ret = -EIO;
                        pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "srcdev (devid %llu) is missing, need to run 'btrfs dev scan'?\n",
-                               (unsigned long long)src_devid);
+                               src_devid);
                }
                if (!dev_replace->tgtdev &&
                    !btrfs_test_opt(dev_root, DEGRADED)) {
                        ret = -EIO;
                        pr_warn("btrfs: cannot mount because device replace operation is ongoing and\n" "tgtdev (devid %llu) is missing, need to run btrfs dev scan?\n",
-                               (unsigned long long)BTRFS_DEV_REPLACE_DEVID);
+                               BTRFS_DEV_REPLACE_DEVID);
                }
                if (dev_replace->tgtdev) {
                        if (dev_replace->srcdev) {
index 6b092a1..4cbb00a 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/migrate.h>
 #include <linux/ratelimit.h>
 #include <linux/uuid.h>
+#include <linux/semaphore.h>
 #include <asm/unaligned.h>
 #include "compat.h"
 #include "ctree.h"
@@ -302,9 +303,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf,
                        printk_ratelimited(KERN_INFO "btrfs: %s checksum verify "
                                       "failed on %llu wanted %X found %X "
                                       "level %d\n",
-                                      root->fs_info->sb->s_id,
-                                      (unsigned long long)buf->start, val, found,
-                                      btrfs_header_level(buf));
+                                      root->fs_info->sb->s_id, buf->start,
+                                      val, found, btrfs_header_level(buf));
                        if (result != (char *)&inline_result)
                                kfree(result);
                        return 1;
@@ -345,9 +345,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree,
        }
        printk_ratelimited("parent transid verify failed on %llu wanted %llu "
                       "found %llu\n",
-                      (unsigned long long)eb->start,
-                      (unsigned long long)parent_transid,
-                      (unsigned long long)btrfs_header_generation(eb));
+                      eb->start, parent_transid, btrfs_header_generation(eb));
        ret = 1;
        clear_extent_buffer_uptodate(eb);
 out:
@@ -497,8 +495,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
        u8 fsid[BTRFS_UUID_SIZE];
        int ret = 1;
 
-       read_extent_buffer(eb, fsid, (unsigned long)btrfs_header_fsid(eb),
-                          BTRFS_FSID_SIZE);
+       read_extent_buffer(eb, fsid, btrfs_header_fsid(eb), BTRFS_FSID_SIZE);
        while (fs_devices) {
                if (!memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE)) {
                        ret = 0;
@@ -512,8 +509,7 @@ static int check_tree_block_fsid(struct btrfs_root *root,
 #define CORRUPT(reason, eb, root, slot)                                \
        printk(KERN_CRIT "btrfs: corrupt leaf, %s: block=%llu," \
               "root=%llu, slot=%d\n", reason,                  \
-              (unsigned long long)btrfs_header_bytenr(eb),     \
-              (unsigned long long)root->objectid, slot)
+              btrfs_header_bytenr(eb), root->objectid, slot)
 
 static noinline int check_leaf(struct btrfs_root *root,
                               struct extent_buffer *leaf)
@@ -576,8 +572,9 @@ static noinline int check_leaf(struct btrfs_root *root,
        return 0;
 }
 
-static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state, int mirror)
+static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
 {
        struct extent_io_tree *tree;
        u64 found_start;
@@ -612,14 +609,13 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end,
        if (found_start != eb->start) {
                printk_ratelimited(KERN_INFO "btrfs bad tree block start "
                               "%llu %llu\n",
-                              (unsigned long long)found_start,
-                              (unsigned long long)eb->start);
+                              found_start, eb->start);
                ret = -EIO;
                goto err;
        }
        if (check_tree_block_fsid(root, eb)) {
                printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n",
-                              (unsigned long long)eb->start);
+                              eb->start);
                ret = -EIO;
                goto err;
        }
@@ -1148,6 +1144,10 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr,
                return NULL;
 
        ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid);
+       if (ret) {
+               free_extent_buffer(buf);
+               return NULL;
+       }
        return buf;
 
 }
@@ -1291,11 +1291,10 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
        btrfs_set_header_owner(leaf, objectid);
        root->node = leaf;
 
-       write_extent_buffer(leaf, fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(leaf),
+       write_extent_buffer(leaf, fs_info->fsid, btrfs_header_fsid(leaf),
                            BTRFS_FSID_SIZE);
        write_extent_buffer(leaf, fs_info->chunk_tree_uuid,
-                           (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+                           btrfs_header_chunk_tree_uuid(leaf),
                            BTRFS_UUID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
 
@@ -1379,8 +1378,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
        root->node = leaf;
 
        write_extent_buffer(root->node, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(root->node),
-                           BTRFS_FSID_SIZE);
+                           btrfs_header_fsid(root->node), BTRFS_FSID_SIZE);
        btrfs_mark_buffer_dirty(root->node);
        btrfs_tree_unlock(root->node);
        return root;
@@ -1413,11 +1411,11 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        log_root->root_key.offset = root->root_key.objectid;
 
        inode_item = &log_root->root_item.inode;
-       inode_item->generation = cpu_to_le64(1);
-       inode_item->size = cpu_to_le64(3);
-       inode_item->nlink = cpu_to_le32(1);
-       inode_item->nbytes = cpu_to_le64(root->leafsize);
-       inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+       btrfs_set_stack_inode_generation(inode_item, 1);
+       btrfs_set_stack_inode_size(inode_item, 3);
+       btrfs_set_stack_inode_nlink(inode_item, 1);
+       btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+       btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
 
        btrfs_set_root_node(&log_root->root_item, log_root->node);
 
@@ -1428,8 +1426,8 @@ int btrfs_add_log_tree(struct btrfs_trans_handle *trans,
        return 0;
 }
 
-struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
-                                       struct btrfs_key *key)
+static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
+                                              struct btrfs_key *key)
 {
        struct btrfs_root *root;
        struct btrfs_fs_info *fs_info = tree_root->fs_info;
@@ -1529,8 +1527,8 @@ fail:
        return ret;
 }
 
-struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-                                       u64 root_id)
+static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+                                              u64 root_id)
 {
        struct btrfs_root *root;
 
@@ -1581,10 +1579,16 @@ struct btrfs_root *btrfs_read_fs_root_no_name(struct btrfs_fs_info *fs_info,
        if (location->objectid == BTRFS_QUOTA_TREE_OBJECTID)
                return fs_info->quota_root ? fs_info->quota_root :
                                             ERR_PTR(-ENOENT);
+       if (location->objectid == BTRFS_UUID_TREE_OBJECTID)
+               return fs_info->uuid_root ? fs_info->uuid_root :
+                                           ERR_PTR(-ENOENT);
 again:
        root = btrfs_lookup_fs_root(fs_info, location->objectid);
-       if (root)
+       if (root) {
+               if (btrfs_root_refs(&root->root_item) == 0)
+                       return ERR_PTR(-ENOENT);
                return root;
+       }
 
        root = btrfs_read_fs_root(fs_info->tree_root, location);
        if (IS_ERR(root))
@@ -1737,7 +1741,7 @@ static int transaction_kthread(void *arg)
 
        do {
                cannot_commit = false;
-               delay = HZ * 30;
+               delay = HZ * root->fs_info->commit_interval;
                mutex_lock(&root->fs_info->transaction_kthread_mutex);
 
                spin_lock(&root->fs_info->trans_lock);
@@ -1749,7 +1753,8 @@ static int transaction_kthread(void *arg)
 
                now = get_seconds();
                if (cur->state < TRANS_STATE_BLOCKED &&
-                   (now < cur->start_time || now - cur->start_time < 30)) {
+                   (now < cur->start_time ||
+                    now - cur->start_time < root->fs_info->commit_interval)) {
                        spin_unlock(&root->fs_info->trans_lock);
                        delay = HZ * 5;
                        goto sleep;
@@ -2038,6 +2043,12 @@ static void free_root_pointers(struct btrfs_fs_info *info, int chunk_root)
                info->quota_root->node = NULL;
                info->quota_root->commit_root = NULL;
        }
+       if (info->uuid_root) {
+               free_extent_buffer(info->uuid_root->node);
+               free_extent_buffer(info->uuid_root->commit_root);
+               info->uuid_root->node = NULL;
+               info->uuid_root->commit_root = NULL;
+       }
        if (chunk_root) {
                free_extent_buffer(info->chunk_root->node);
                free_extent_buffer(info->chunk_root->commit_root);
@@ -2098,11 +2109,14 @@ int open_ctree(struct super_block *sb,
        struct btrfs_root *chunk_root;
        struct btrfs_root *dev_root;
        struct btrfs_root *quota_root;
+       struct btrfs_root *uuid_root;
        struct btrfs_root *log_tree_root;
        int ret;
        int err = -EINVAL;
        int num_backups_tried = 0;
        int backup_index = 0;
+       bool create_uuid_tree;
+       bool check_uuid_tree;
 
        tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
        chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
@@ -2189,6 +2203,7 @@ int open_ctree(struct super_block *sb,
        fs_info->defrag_inodes = RB_ROOT;
        fs_info->free_chunk_space = 0;
        fs_info->tree_mod_log = RB_ROOT;
+       fs_info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
 
        /* readahead state */
        INIT_RADIX_TREE(&fs_info->reada_tree, GFP_NOFS & ~__GFP_WAIT);
@@ -2270,6 +2285,7 @@ int open_ctree(struct super_block *sb,
 
 
        mutex_init(&fs_info->ordered_operations_mutex);
+       mutex_init(&fs_info->ordered_extent_flush_mutex);
        mutex_init(&fs_info->tree_log_mutex);
        mutex_init(&fs_info->chunk_mutex);
        mutex_init(&fs_info->transaction_kthread_mutex);
@@ -2278,6 +2294,7 @@ int open_ctree(struct super_block *sb,
        init_rwsem(&fs_info->extent_commit_sem);
        init_rwsem(&fs_info->cleanup_work_sem);
        init_rwsem(&fs_info->subvol_sem);
+       sema_init(&fs_info->uuid_tree_rescan_sem, 1);
        fs_info->dev_replace.lock_owner = 0;
        atomic_set(&fs_info->dev_replace.nesting_level, 0);
        mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
@@ -2383,7 +2400,7 @@ int open_ctree(struct super_block *sb,
        if (features) {
                printk(KERN_ERR "BTRFS: couldn't mount because of "
                       "unsupported optional features (%Lx).\n",
-                      (unsigned long long)features);
+                      features);
                err = -EINVAL;
                goto fail_alloc;
        }
@@ -2453,7 +2470,7 @@ int open_ctree(struct super_block *sb,
        if (!(sb->s_flags & MS_RDONLY) && features) {
                printk(KERN_ERR "BTRFS: couldn't mount RDWR because of "
                       "unsupported option features (%Lx).\n",
-                      (unsigned long long)features);
+                      features);
                err = -EINVAL;
                goto fail_alloc;
        }
@@ -2466,20 +2483,17 @@ int open_ctree(struct super_block *sb,
                           &fs_info->generic_worker);
 
        btrfs_init_workers(&fs_info->delalloc_workers, "delalloc",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
+                          fs_info->thread_pool_size, NULL);
 
        btrfs_init_workers(&fs_info->flush_workers, "flush_delalloc",
-                          fs_info->thread_pool_size,
-                          &fs_info->generic_worker);
+                          fs_info->thread_pool_size, NULL);
 
        btrfs_init_workers(&fs_info->submit_workers, "submit",
                           min_t(u64, fs_devices->num_devices,
-                          fs_info->thread_pool_size),
-                          &fs_info->generic_worker);
+                          fs_info->thread_pool_size), NULL);
 
        btrfs_init_workers(&fs_info->caching_workers, "cache",
-                          2, &fs_info->generic_worker);
+                          fs_info->thread_pool_size, NULL);
 
        /* a higher idle thresh on the submit workers makes it much more
         * likely that bios will be send down in a sane order to the
@@ -2575,7 +2589,7 @@ int open_ctree(struct super_block *sb,
        sb->s_blocksize = sectorsize;
        sb->s_blocksize_bits = blksize_bits(sectorsize);
 
-       if (disk_super->magic != cpu_to_le64(BTRFS_MAGIC)) {
+       if (btrfs_super_magic(disk_super) != BTRFS_MAGIC) {
                printk(KERN_INFO "btrfs: valid FS not found on %s\n", sb->s_id);
                goto fail_sb_buffer;
        }
@@ -2615,8 +2629,7 @@ int open_ctree(struct super_block *sb,
        chunk_root->commit_root = btrfs_root_node(chunk_root);
 
        read_extent_buffer(chunk_root->node, fs_info->chunk_tree_uuid,
-          (unsigned long)btrfs_header_chunk_tree_uuid(chunk_root->node),
-          BTRFS_UUID_SIZE);
+          btrfs_header_chunk_tree_uuid(chunk_root->node), BTRFS_UUID_SIZE);
 
        ret = btrfs_read_chunk_tree(chunk_root);
        if (ret) {
@@ -2696,6 +2709,22 @@ retry_root_backup:
                fs_info->quota_root = quota_root;
        }
 
+       location.objectid = BTRFS_UUID_TREE_OBJECTID;
+       uuid_root = btrfs_read_tree_root(tree_root, &location);
+       if (IS_ERR(uuid_root)) {
+               ret = PTR_ERR(uuid_root);
+               if (ret != -ENOENT)
+                       goto recovery_tree_root;
+               create_uuid_tree = true;
+               check_uuid_tree = false;
+       } else {
+               uuid_root->track_dirty = 1;
+               fs_info->uuid_root = uuid_root;
+               create_uuid_tree = false;
+               check_uuid_tree =
+                   generation != btrfs_super_uuid_tree_generation(disk_super);
+       }
+
        fs_info->generation = generation;
        fs_info->last_trans_committed = generation;
 
@@ -2882,6 +2911,29 @@ retry_root_backup:
 
        btrfs_qgroup_rescan_resume(fs_info);
 
+       if (create_uuid_tree) {
+               pr_info("btrfs: creating UUID tree\n");
+               ret = btrfs_create_uuid_tree(fs_info);
+               if (ret) {
+                       pr_warn("btrfs: failed to create the UUID tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       } else if (check_uuid_tree ||
+                  btrfs_test_opt(tree_root, RESCAN_UUID_TREE)) {
+               pr_info("btrfs: checking UUID tree\n");
+               ret = btrfs_check_uuid_tree(fs_info);
+               if (ret) {
+                       pr_warn("btrfs: failed to check the UUID tree %d\n",
+                               ret);
+                       close_ctree(tree_root);
+                       return ret;
+               }
+       } else {
+               fs_info->update_uuid_tree_gen = 1;
+       }
+
        return 0;
 
 fail_qgroup:
@@ -2983,15 +3035,17 @@ struct buffer_head *btrfs_read_dev_super(struct block_device *bdev)
         */
        for (i = 0; i < 1; i++) {
                bytenr = btrfs_sb_offset(i);
-               if (bytenr + 4096 >= i_size_read(bdev->bd_inode))
+               if (bytenr + BTRFS_SUPER_INFO_SIZE >=
+                                       i_size_read(bdev->bd_inode))
                        break;
-               bh = __bread(bdev, bytenr / 4096, 4096);
+               bh = __bread(bdev, bytenr / 4096,
+                                       BTRFS_SUPER_INFO_SIZE);
                if (!bh)
                        continue;
 
                super = (struct btrfs_super_block *)bh->b_data;
                if (btrfs_super_bytenr(super) != bytenr ||
-                   super->magic != cpu_to_le64(BTRFS_MAGIC)) {
+                   btrfs_super_magic(super) != BTRFS_MAGIC) {
                        brelse(bh);
                        continue;
                }
@@ -3311,7 +3365,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
        int total_errors = 0;
        u64 flags;
 
-       max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
        do_barriers = !btrfs_test_opt(root, NOBARRIER);
        backup_super_roots(root->fs_info);
 
@@ -3320,6 +3373,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
 
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        head = &root->fs_info->fs_devices->devices;
+       max_errors = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
 
        if (do_barriers) {
                ret = barrier_all_devices(root->fs_info);
@@ -3362,8 +3416,10 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors)
                printk(KERN_ERR "btrfs: %d errors while writing supers\n",
                       total_errors);
 
-               /* This shouldn't happen. FUA is masked off if unsupported */
-               BUG();
+               /* FUA is masked off if unsupported and can't be the reason */
+               btrfs_error(root->fs_info, -EIO,
+                           "%d errors while writing supers", total_errors);
+               return -EIO;
        }
 
        total_errors = 0;
@@ -3421,6 +3477,8 @@ static void free_fs_root(struct btrfs_root *root)
 {
        iput(root->cache_inode);
        WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
+       btrfs_free_block_rsv(root, root->orphan_block_rsv);
+       root->orphan_block_rsv = NULL;
        if (root->anon_dev)
                free_anon_bdev(root->anon_dev);
        free_extent_buffer(root->node);
@@ -3510,6 +3568,11 @@ int close_ctree(struct btrfs_root *root)
        fs_info->closing = 1;
        smp_mb();
 
+       /* wait for the uuid_scan task to finish */
+       down(&fs_info->uuid_tree_rescan_sem);
+       /* avoid complains from lockdep et al., set sem back to initial state */
+       up(&fs_info->uuid_tree_rescan_sem);
+
        /* pause restriper - we want to resume on mount */
        btrfs_pause_balance(fs_info);
 
@@ -3573,6 +3636,9 @@ int close_ctree(struct btrfs_root *root)
 
        btrfs_free_stripe_hash_table(fs_info);
 
+       btrfs_free_block_rsv(root, root->orphan_block_rsv);
+       root->orphan_block_rsv = NULL;
+
        return 0;
 }
 
@@ -3608,9 +3674,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf)
        if (transid != root->fs_info->generation)
                WARN(1, KERN_CRIT "btrfs transid mismatch buffer %llu, "
                       "found %llu running %llu\n",
-                       (unsigned long long)buf->start,
-                       (unsigned long long)transid,
-                       (unsigned long long)root->fs_info->generation);
+                       buf->start, transid, root->fs_info->generation);
        was_dirty = set_extent_buffer_dirty(buf);
        if (!was_dirty)
                __percpu_counter_add(&root->fs_info->dirty_metadata_bytes,
@@ -3744,8 +3808,8 @@ static void btrfs_destroy_all_ordered_extents(struct btrfs_fs_info *fs_info)
        spin_unlock(&fs_info->ordered_root_lock);
 }
 
-int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
-                              struct btrfs_root *root)
+static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
+                                     struct btrfs_root *root)
 {
        struct rb_node *node;
        struct btrfs_delayed_ref_root *delayed_refs;
index 1204c8e..cfb3cf7 100644 (file)
@@ -113,7 +113,8 @@ static noinline int
 block_group_cache_done(struct btrfs_block_group_cache *cache)
 {
        smp_mb();
-       return cache->cached == BTRFS_CACHE_FINISHED;
+       return cache->cached == BTRFS_CACHE_FINISHED ||
+               cache->cached == BTRFS_CACHE_ERROR;
 }
 
 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
@@ -389,7 +390,7 @@ static noinline void caching_thread(struct btrfs_work *work)
        u64 total_found = 0;
        u64 last = 0;
        u32 nritems;
-       int ret = 0;
+       int ret = -ENOMEM;
 
        caching_ctl = container_of(work, struct btrfs_caching_control, work);
        block_group = caching_ctl->block_group;
@@ -420,6 +421,7 @@ again:
        /* need to make sure the commit_root doesn't disappear */
        down_read(&fs_info->extent_commit_sem);
 
+next:
        ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
        if (ret < 0)
                goto err;
@@ -459,6 +461,16 @@ again:
                        continue;
                }
 
+               if (key.objectid < last) {
+                       key.objectid = last;
+                       key.offset = 0;
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+
+                       caching_ctl->progress = last;
+                       btrfs_release_path(path);
+                       goto next;
+               }
+
                if (key.objectid < block_group->key.objectid) {
                        path->slots[0]++;
                        continue;
@@ -506,6 +518,12 @@ err:
 
        mutex_unlock(&caching_ctl->mutex);
 out:
+       if (ret) {
+               spin_lock(&block_group->lock);
+               block_group->caching_ctl = NULL;
+               block_group->cached = BTRFS_CACHE_ERROR;
+               spin_unlock(&block_group->lock);
+       }
        wake_up(&caching_ctl->wait);
 
        put_caching_control(caching_ctl);
@@ -771,10 +789,23 @@ again:
                goto out_free;
 
        if (ret > 0 && metadata && key.type == BTRFS_METADATA_ITEM_KEY) {
-               key.type = BTRFS_EXTENT_ITEM_KEY;
-               key.offset = root->leafsize;
-               btrfs_release_path(path);
-               goto again;
+               metadata = 0;
+               if (path->slots[0]) {
+                       path->slots[0]--;
+                       btrfs_item_key_to_cpu(path->nodes[0], &key,
+                                             path->slots[0]);
+                       if (key.objectid == bytenr &&
+                           key.type == BTRFS_EXTENT_ITEM_KEY &&
+                           key.offset == root->leafsize)
+                               ret = 0;
+               }
+               if (ret) {
+                       key.objectid = bytenr;
+                       key.type = BTRFS_EXTENT_ITEM_KEY;
+                       key.offset = root->leafsize;
+                       btrfs_release_path(path);
+                       goto again;
+               }
        }
 
        if (ret == 0) {
@@ -2011,6 +2042,8 @@ static int run_delayed_data_ref(struct btrfs_trans_handle *trans,
        ins.type = BTRFS_EXTENT_ITEM_KEY;
 
        ref = btrfs_delayed_node_to_data_ref(node);
+       trace_run_delayed_data_ref(node, ref, node->action);
+
        if (node->type == BTRFS_SHARED_DATA_REF_KEY)
                parent = ref->parent;
        else
@@ -2154,6 +2187,8 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans,
                                                 SKINNY_METADATA);
 
        ref = btrfs_delayed_node_to_tree_ref(node);
+       trace_run_delayed_tree_ref(node, ref, node->action);
+
        if (node->type == BTRFS_SHARED_BLOCK_REF_KEY)
                parent = ref->parent;
        else
@@ -2212,6 +2247,8 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
                 */
                BUG_ON(extent_op);
                head = btrfs_delayed_node_to_head(node);
+               trace_run_delayed_ref_head(node, head, node->action);
+
                if (insert_reserved) {
                        btrfs_pin_extent(root, node->bytenr,
                                         node->num_bytes, 1);
@@ -2403,6 +2440,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                        default:
                                WARN_ON(1);
                        }
+               } else {
+                       list_del_init(&locked_ref->cluster);
                }
                spin_unlock(&delayed_refs->lock);
 
@@ -2425,7 +2464,6 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans,
                 * list before we release it.
                 */
                if (btrfs_delayed_ref_is_head(ref)) {
-                       list_del_init(&locked_ref->cluster);
                        btrfs_delayed_ref_unlock(locked_ref);
                        locked_ref = NULL;
                }
@@ -3799,8 +3837,12 @@ again:
        if (force < space_info->force_alloc)
                force = space_info->force_alloc;
        if (space_info->full) {
+               if (should_alloc_chunk(extent_root, space_info, force))
+                       ret = -ENOSPC;
+               else
+                       ret = 0;
                spin_unlock(&space_info->lock);
-               return 0;
+               return ret;
        }
 
        if (!should_alloc_chunk(extent_root, space_info, force)) {
@@ -4320,6 +4362,9 @@ static struct btrfs_block_rsv *get_block_rsv(
        if (root == root->fs_info->csum_root && trans->adding_csums)
                block_rsv = trans->block_rsv;
 
+       if (root == root->fs_info->uuid_root)
+               block_rsv = trans->block_rsv;
+
        if (!block_rsv)
                block_rsv = root->block_rsv;
 
@@ -4729,10 +4774,12 @@ void btrfs_orphan_release_metadata(struct inode *inode)
 int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                     struct btrfs_block_rsv *rsv,
                                     int items,
-                                    u64 *qgroup_reserved)
+                                    u64 *qgroup_reserved,
+                                    bool use_global_rsv)
 {
        u64 num_bytes;
        int ret;
+       struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv;
 
        if (root->fs_info->quota_enabled) {
                /* One for parent inode, two for dir entries */
@@ -4751,6 +4798,10 @@ int btrfs_subvolume_reserve_metadata(struct btrfs_root *root,
                                            BTRFS_BLOCK_GROUP_METADATA);
        ret = btrfs_block_rsv_add(root, rsv, num_bytes,
                                  BTRFS_RESERVE_FLUSH_ALL);
+
+       if (ret == -ENOSPC && use_global_rsv)
+               ret = btrfs_block_rsv_migrate(global_rsv, rsv, num_bytes);
+
        if (ret) {
                if (*qgroup_reserved)
                        btrfs_qgroup_free(root, *qgroup_reserved);
@@ -5668,7 +5719,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
 
                        if (ret) {
                                btrfs_err(info, "umm, got %d back from search, was looking for %llu",
-                                       ret, (unsigned long long)bytenr);
+                                       ret, bytenr);
                                if (ret > 0)
                                        btrfs_print_leaf(extent_root,
                                                         path->nodes[0]);
@@ -5684,11 +5735,8 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                WARN_ON(1);
                btrfs_err(info,
                        "unable to find ref byte nr %llu parent %llu root %llu  owner %llu offset %llu",
-                       (unsigned long long)bytenr,
-                       (unsigned long long)parent,
-                       (unsigned long long)root_objectid,
-                       (unsigned long long)owner_objectid,
-                       (unsigned long long)owner_offset);
+                       bytenr, parent, root_objectid, owner_objectid,
+                       owner_offset);
        } else {
                btrfs_abort_transaction(trans, extent_root, ret);
                goto out;
@@ -5717,7 +5765,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
                                        -1, 1);
                if (ret) {
                        btrfs_err(info, "umm, got %d back from search, was looking for %llu",
-                               ret, (unsigned long long)bytenr);
+                               ret, bytenr);
                        btrfs_print_leaf(extent_root, path->nodes[0]);
                }
                if (ret < 0) {
@@ -5999,8 +6047,11 @@ static u64 stripe_align(struct btrfs_root *root,
  * for our min num_bytes.  Another option is to have it go ahead
  * and look in the rbtree for a free extent of a given size, but this
  * is a good start.
+ *
+ * Callers of this must check if cache->cached == BTRFS_CACHE_ERROR before using
+ * any of the information in this block group.
  */
-static noinline int
+static noinline void
 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
                                u64 num_bytes)
 {
@@ -6008,28 +6059,29 @@ wait_block_group_cache_progress(struct btrfs_block_group_cache *cache,
 
        caching_ctl = get_caching_control(cache);
        if (!caching_ctl)
-               return 0;
+               return;
 
        wait_event(caching_ctl->wait, block_group_cache_done(cache) ||
                   (cache->free_space_ctl->free_space >= num_bytes));
 
        put_caching_control(caching_ctl);
-       return 0;
 }
 
 static noinline int
 wait_block_group_cache_done(struct btrfs_block_group_cache *cache)
 {
        struct btrfs_caching_control *caching_ctl;
+       int ret = 0;
 
        caching_ctl = get_caching_control(cache);
        if (!caching_ctl)
-               return 0;
+               return (cache->cached == BTRFS_CACHE_ERROR) ? -EIO : 0;
 
        wait_event(caching_ctl->wait, block_group_cache_done(cache));
-
+       if (cache->cached == BTRFS_CACHE_ERROR)
+               ret = -EIO;
        put_caching_control(caching_ctl);
-       return 0;
+       return ret;
 }
 
 int __get_raid_index(u64 flags)
@@ -6070,8 +6122,7 @@ enum btrfs_loop_type {
  * ins->offset == number of blocks
  * Any available blocks before search_start are skipped.
  */
-static noinline int find_free_extent(struct btrfs_trans_handle *trans,
-                                    struct btrfs_root *orig_root,
+static noinline int find_free_extent(struct btrfs_root *orig_root,
                                     u64 num_bytes, u64 empty_size,
                                     u64 hint_byte, struct btrfs_key *ins,
                                     u64 flags)
@@ -6212,6 +6263,8 @@ have_block_group:
                        ret = 0;
                }
 
+               if (unlikely(block_group->cached == BTRFS_CACHE_ERROR))
+                       goto loop;
                if (unlikely(block_group->ro))
                        goto loop;
 
@@ -6292,10 +6345,10 @@ refill_cluster:
                                              block_group->full_stripe_len);
 
                        /* allocate a cluster in this block group */
-                       ret = btrfs_find_space_cluster(trans, root,
-                                              block_group, last_ptr,
-                                              search_start, num_bytes,
-                                              aligned_cluster);
+                       ret = btrfs_find_space_cluster(root, block_group,
+                                                      last_ptr, search_start,
+                                                      num_bytes,
+                                                      aligned_cluster);
                        if (ret == 0) {
                                /*
                                 * now pull our allocation out of this
@@ -6426,17 +6479,28 @@ loop:
                index = 0;
                loop++;
                if (loop == LOOP_ALLOC_CHUNK) {
+                       struct btrfs_trans_handle *trans;
+
+                       trans = btrfs_join_transaction(root);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               goto out;
+                       }
+
                        ret = do_chunk_alloc(trans, root, flags,
                                             CHUNK_ALLOC_FORCE);
                        /*
                         * Do not bail out on ENOSPC since we
                         * can do more things.
                         */
-                       if (ret < 0 && ret != -ENOSPC) {
+                       if (ret < 0 && ret != -ENOSPC)
                                btrfs_abort_transaction(trans,
                                                        root, ret);
+                       else
+                               ret = 0;
+                       btrfs_end_transaction(trans, root);
+                       if (ret)
                                goto out;
-                       }
                }
 
                if (loop == LOOP_NO_EMPTY_SIZE) {
@@ -6463,19 +6527,15 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes,
 
        spin_lock(&info->lock);
        printk(KERN_INFO "space_info %llu has %llu free, is %sfull\n",
-              (unsigned long long)info->flags,
-              (unsigned long long)(info->total_bytes - info->bytes_used -
-                                   info->bytes_pinned - info->bytes_reserved -
-                                   info->bytes_readonly),
+              info->flags,
+              info->total_bytes - info->bytes_used - info->bytes_pinned -
+              info->bytes_reserved - info->bytes_readonly,
               (info->full) ? "" : "not ");
        printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, "
               "reserved=%llu, may_use=%llu, readonly=%llu\n",
-              (unsigned long long)info->total_bytes,
-              (unsigned long long)info->bytes_used,
-              (unsigned long long)info->bytes_pinned,
-              (unsigned long long)info->bytes_reserved,
-              (unsigned long long)info->bytes_may_use,
-              (unsigned long long)info->bytes_readonly);
+              info->total_bytes, info->bytes_used, info->bytes_pinned,
+              info->bytes_reserved, info->bytes_may_use,
+              info->bytes_readonly);
        spin_unlock(&info->lock);
 
        if (!dump_block_groups)
@@ -6486,12 +6546,9 @@ again:
        list_for_each_entry(cache, &info->block_groups[index], list) {
                spin_lock(&cache->lock);
                printk(KERN_INFO "block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %s\n",
-                      (unsigned long long)cache->key.objectid,
-                      (unsigned long long)cache->key.offset,
-                      (unsigned long long)btrfs_block_group_used(&cache->item),
-                      (unsigned long long)cache->pinned,
-                      (unsigned long long)cache->reserved,
-                      cache->ro ? "[readonly]" : "");
+                      cache->key.objectid, cache->key.offset,
+                      btrfs_block_group_used(&cache->item), cache->pinned,
+                      cache->reserved, cache->ro ? "[readonly]" : "");
                btrfs_dump_free_space(cache, bytes);
                spin_unlock(&cache->lock);
        }
@@ -6500,8 +6557,7 @@ again:
        up_read(&info->groups_sem);
 }
 
-int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
-                        struct btrfs_root *root,
+int btrfs_reserve_extent(struct btrfs_root *root,
                         u64 num_bytes, u64 min_alloc_size,
                         u64 empty_size, u64 hint_byte,
                         struct btrfs_key *ins, int is_data)
@@ -6513,8 +6569,8 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
        flags = btrfs_get_alloc_profile(root, is_data);
 again:
        WARN_ON(num_bytes < root->sectorsize);
-       ret = find_free_extent(trans, root, num_bytes, empty_size,
-                              hint_byte, ins, flags);
+       ret = find_free_extent(root, num_bytes, empty_size, hint_byte, ins,
+                              flags);
 
        if (ret == -ENOSPC) {
                if (!final_tried) {
@@ -6529,8 +6585,7 @@ again:
 
                        sinfo = __find_space_info(root->fs_info, flags);
                        btrfs_err(root->fs_info, "allocation failed flags %llu, wanted %llu",
-                               (unsigned long long)flags,
-                               (unsigned long long)num_bytes);
+                               flags, num_bytes);
                        if (sinfo)
                                dump_space_info(sinfo, num_bytes, 1);
                }
@@ -6550,7 +6605,7 @@ static int __btrfs_free_reserved_extent(struct btrfs_root *root,
        cache = btrfs_lookup_block_group(root->fs_info, start);
        if (!cache) {
                btrfs_err(root->fs_info, "Unable to find block group for %llu",
-                       (unsigned long long)start);
+                       start);
                return -ENOSPC;
        }
 
@@ -6646,8 +6701,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
        ret = update_block_group(root, ins->objectid, ins->offset, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
-                       (unsigned long long)ins->objectid,
-                       (unsigned long long)ins->offset);
+                       ins->objectid, ins->offset);
                BUG();
        }
        return ret;
@@ -6719,8 +6773,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans,
        ret = update_block_group(root, ins->objectid, root->leafsize, 1);
        if (ret) { /* -ENOENT, logic error */
                btrfs_err(fs_info, "update block group failed for %llu %llu",
-                       (unsigned long long)ins->objectid,
-                       (unsigned long long)ins->offset);
+                       ins->objectid, ins->offset);
                BUG();
        }
        return ret;
@@ -6902,7 +6955,7 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
        if (IS_ERR(block_rsv))
                return ERR_CAST(block_rsv);
 
-       ret = btrfs_reserve_extent(trans, root, blocksize, blocksize,
+       ret = btrfs_reserve_extent(root, blocksize, blocksize,
                                   empty_size, hint, &ins, 0);
        if (ret) {
                unuse_block_rsv(root->fs_info, block_rsv, blocksize);
@@ -7173,6 +7226,8 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans,
                next = btrfs_find_create_tree_block(root, bytenr, blocksize);
                if (!next)
                        return -ENOMEM;
+               btrfs_set_buffer_lockdep_class(root->root_key.objectid, next,
+                                              level - 1);
                reada = 1;
        }
        btrfs_tree_lock(next);
@@ -7658,7 +7713,7 @@ out:
         * don't have it in the radix (like when we recover after a power fail
         * or unmount) so we don't leak memory.
         */
-       if (root_dropped == false)
+       if (!for_reloc && root_dropped == false)
                btrfs_add_dead_root(root);
        if (err)
                btrfs_std_error(root->fs_info, err);
@@ -8192,7 +8247,8 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                 * We haven't cached this block group, which means we could
                 * possibly have excluded extents on this block group.
                 */
-               if (block_group->cached == BTRFS_CACHE_NO)
+               if (block_group->cached == BTRFS_CACHE_NO ||
+                   block_group->cached == BTRFS_CACHE_ERROR)
                        free_excluded_extents(info->extent_root, block_group);
 
                btrfs_remove_free_space_cache(block_group);
@@ -8409,9 +8465,13 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                 * avoid allocating from un-mirrored block group if there are
                 * mirrored block groups.
                 */
-               list_for_each_entry(cache, &space_info->block_groups[3], list)
+               list_for_each_entry(cache,
+                               &space_info->block_groups[BTRFS_RAID_RAID0],
+                               list)
                        set_block_group_ro(cache, 1);
-               list_for_each_entry(cache, &space_info->block_groups[4], list)
+               list_for_each_entry(cache,
+                               &space_info->block_groups[BTRFS_RAID_SINGLE],
+                               list)
                        set_block_group_ro(cache, 1);
        }
 
index fe443fe..09582b8 100644 (file)
@@ -61,9 +61,8 @@ void btrfs_leak_debug_check(void)
                state = list_entry(states.next, struct extent_state, leak_list);
                printk(KERN_ERR "btrfs state leak: start %llu end %llu "
                       "state %lu in tree %p refs %d\n",
-                      (unsigned long long)state->start,
-                      (unsigned long long)state->end,
-                      state->state, state->tree, atomic_read(&state->refs));
+                      state->start, state->end, state->state, state->tree,
+                      atomic_read(&state->refs));
                list_del(&state->leak_list);
                kmem_cache_free(extent_state_cache, state);
        }
@@ -71,8 +70,8 @@ void btrfs_leak_debug_check(void)
        while (!list_empty(&buffers)) {
                eb = list_entry(buffers.next, struct extent_buffer, leak_list);
                printk(KERN_ERR "btrfs buffer leak start %llu len %lu "
-                      "refs %d\n", (unsigned long long)eb->start,
-                      eb->len, atomic_read(&eb->refs));
+                      "refs %d\n",
+                      eb->start, eb->len, atomic_read(&eb->refs));
                list_del(&eb->leak_list);
                kmem_cache_free(extent_buffer_cache, eb);
        }
@@ -88,11 +87,7 @@ static inline void __btrfs_debug_check_extent_io_range(const char *caller,
        if (end >= PAGE_SIZE && (end % 2) == 0 && end != isize - 1) {
                printk_ratelimited(KERN_DEBUG
                    "btrfs: %s: ino %llu isize %llu odd range [%llu,%llu]\n",
-                               caller,
-                               (unsigned long long)btrfs_ino(inode),
-                               (unsigned long long)isize,
-                               (unsigned long long)start,
-                               (unsigned long long)end);
+                               caller, btrfs_ino(inode), isize, start, end);
        }
 }
 #else
@@ -388,8 +383,7 @@ static int insert_state(struct extent_io_tree *tree,
 
        if (end < start)
                WARN(1, KERN_ERR "btrfs end < start %llu %llu\n",
-                      (unsigned long long)end,
-                      (unsigned long long)start);
+                      end, start);
        state->start = start;
        state->end = end;
 
@@ -400,9 +394,8 @@ static int insert_state(struct extent_io_tree *tree,
                struct extent_state *found;
                found = rb_entry(node, struct extent_state, rb_node);
                printk(KERN_ERR "btrfs found node %llu %llu on insert of "
-                      "%llu %llu\n", (unsigned long long)found->start,
-                      (unsigned long long)found->end,
-                      (unsigned long long)start, (unsigned long long)end);
+                      "%llu %llu\n",
+                      found->start, found->end, start, end);
                return -EEXIST;
        }
        state->tree = tree;
@@ -762,15 +755,6 @@ static void cache_state(struct extent_state *state,
        }
 }
 
-static void uncache_state(struct extent_state **cached_ptr)
-{
-       if (cached_ptr && (*cached_ptr)) {
-               struct extent_state *state = *cached_ptr;
-               *cached_ptr = NULL;
-               free_extent_state(state);
-       }
-}
-
 /*
  * set some bits on a range in the tree.  This may require allocations or
  * sleeping, so the gfp mask is used to indicate what is allowed.
@@ -1687,31 +1671,21 @@ out_failed:
        return found;
 }
 
-int extent_clear_unlock_delalloc(struct inode *inode,
-                               struct extent_io_tree *tree,
-                               u64 start, u64 end, struct page *locked_page,
-                               unsigned long op)
+int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+                                struct page *locked_page,
+                                unsigned long clear_bits,
+                                unsigned long page_ops)
 {
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        int ret;
        struct page *pages[16];
        unsigned long index = start >> PAGE_CACHE_SHIFT;
        unsigned long end_index = end >> PAGE_CACHE_SHIFT;
        unsigned long nr_pages = end_index - index + 1;
        int i;
-       unsigned long clear_bits = 0;
-
-       if (op & EXTENT_CLEAR_UNLOCK)
-               clear_bits |= EXTENT_LOCKED;
-       if (op & EXTENT_CLEAR_DIRTY)
-               clear_bits |= EXTENT_DIRTY;
-
-       if (op & EXTENT_CLEAR_DELALLOC)
-               clear_bits |= EXTENT_DELALLOC;
 
        clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS);
-       if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
-                   EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK |
-                   EXTENT_SET_PRIVATE2)))
+       if (page_ops == 0)
                return 0;
 
        while (nr_pages > 0) {
@@ -1720,20 +1694,20 @@ int extent_clear_unlock_delalloc(struct inode *inode,
                                     nr_pages, ARRAY_SIZE(pages)), pages);
                for (i = 0; i < ret; i++) {
 
-                       if (op & EXTENT_SET_PRIVATE2)
+                       if (page_ops & PAGE_SET_PRIVATE2)
                                SetPagePrivate2(pages[i]);
 
                        if (pages[i] == locked_page) {
                                page_cache_release(pages[i]);
                                continue;
                        }
-                       if (op & EXTENT_CLEAR_DIRTY)
+                       if (page_ops & PAGE_CLEAR_DIRTY)
                                clear_page_dirty_for_io(pages[i]);
-                       if (op & EXTENT_SET_WRITEBACK)
+                       if (page_ops & PAGE_SET_WRITEBACK)
                                set_page_writeback(pages[i]);
-                       if (op & EXTENT_END_WRITEBACK)
+                       if (page_ops & PAGE_END_WRITEBACK)
                                end_page_writeback(pages[i]);
-                       if (op & EXTENT_CLEAR_UNLOCK_PAGE)
+                       if (page_ops & PAGE_UNLOCK)
                                unlock_page(pages[i]);
                        page_cache_release(pages[i]);
                }
@@ -1810,7 +1784,7 @@ out:
  * set the private field for a given byte offset in the tree.  If there isn't
  * an extent_state there already, this does nothing.
  */
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
 {
        struct rb_node *node;
        struct extent_state *state;
@@ -1837,64 +1811,6 @@ out:
        return ret;
 }
 
-void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
-                           int count)
-{
-       struct rb_node *node;
-       struct extent_state *state;
-
-       spin_lock(&tree->lock);
-       /*
-        * this search will find all the extents that end after
-        * our range starts.
-        */
-       node = tree_search(tree, start);
-       BUG_ON(!node);
-
-       state = rb_entry(node, struct extent_state, rb_node);
-       BUG_ON(state->start != start);
-
-       while (count) {
-               state->private = *csums++;
-               count--;
-               state = next_state(state);
-       }
-       spin_unlock(&tree->lock);
-}
-
-static inline u64 __btrfs_get_bio_offset(struct bio *bio, int bio_index)
-{
-       struct bio_vec *bvec = bio->bi_io_vec + bio_index;
-
-       return page_offset(bvec->bv_page) + bvec->bv_offset;
-}
-
-void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio, int bio_index,
-                       u32 csums[], int count)
-{
-       struct rb_node *node;
-       struct extent_state *state = NULL;
-       u64 start;
-
-       spin_lock(&tree->lock);
-       do {
-               start = __btrfs_get_bio_offset(bio, bio_index);
-               if (state == NULL || state->start != start) {
-                       node = tree_search(tree, start);
-                       BUG_ON(!node);
-
-                       state = rb_entry(node, struct extent_state, rb_node);
-                       BUG_ON(state->start != start);
-               }
-               state->private = *csums++;
-               count--;
-               bio_index++;
-
-               state = next_state(state);
-       } while (count);
-       spin_unlock(&tree->lock);
-}
-
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
 {
        struct rb_node *node;
@@ -2173,7 +2089,8 @@ static int clean_io_failure(u64 start, struct page *page)
                                            EXTENT_LOCKED);
        spin_unlock(&BTRFS_I(inode)->io_tree.lock);
 
-       if (state && state->start == failrec->start) {
+       if (state && state->start <= failrec->start &&
+           state->end >= failrec->start + failrec->len - 1) {
                fs_info = BTRFS_I(inode)->root->fs_info;
                num_copies = btrfs_num_copies(fs_info, failrec->logical,
                                              failrec->len);
@@ -2201,9 +2118,9 @@ out:
  * needed
  */
 
-static int bio_readpage_error(struct bio *failed_bio, struct page *page,
-                               u64 start, u64 end, int failed_mirror,
-                               struct extent_state *state)
+static int bio_readpage_error(struct bio *failed_bio, u64 phy_offset,
+                             struct page *page, u64 start, u64 end,
+                             int failed_mirror)
 {
        struct io_failure_record *failrec = NULL;
        u64 private;
@@ -2213,6 +2130,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
        struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
        struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
        struct bio *bio;
+       struct btrfs_io_bio *btrfs_failed_bio;
+       struct btrfs_io_bio *btrfs_bio;
        int num_copies;
        int ret;
        int read_mode;
@@ -2296,23 +2215,12 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                 * all the retry and error correction code that follows. no
                 * matter what the error is, it is very likely to persist.
                 */
-               pr_debug("bio_readpage_error: cannot repair, num_copies == 1. "
-                        "state=%p, num_copies=%d, next_mirror %d, "
-                        "failed_mirror %d\n", state, num_copies,
-                        failrec->this_mirror, failed_mirror);
+               pr_debug("bio_readpage_error: cannot repair, num_copies=%d, next_mirror %d, failed_mirror %d\n",
+                        num_copies, failrec->this_mirror, failed_mirror);
                free_io_failure(inode, failrec, 0);
                return -EIO;
        }
 
-       if (!state) {
-               spin_lock(&tree->lock);
-               state = find_first_extent_bit_state(tree, failrec->start,
-                                                   EXTENT_LOCKED);
-               if (state && state->start != failrec->start)
-                       state = NULL;
-               spin_unlock(&tree->lock);
-       }
-
        /*
         * there are two premises:
         *      a) deliver good data to the caller
@@ -2349,9 +2257,8 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                read_mode = READ_SYNC;
        }
 
-       if (!state || failrec->this_mirror > num_copies) {
-               pr_debug("bio_readpage_error: (fail) state=%p, num_copies=%d, "
-                        "next_mirror %d, failed_mirror %d\n", state,
+       if (failrec->this_mirror > num_copies) {
+               pr_debug("bio_readpage_error: (fail) num_copies=%d, next_mirror %d, failed_mirror %d\n",
                         num_copies, failrec->this_mirror, failed_mirror);
                free_io_failure(inode, failrec, 0);
                return -EIO;
@@ -2362,12 +2269,24 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page,
                free_io_failure(inode, failrec, 0);
                return -EIO;
        }
-       bio->bi_private = state;
        bio->bi_end_io = failed_bio->bi_end_io;
        bio->bi_sector = failrec->logical >> 9;
        bio->bi_bdev = BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev;
        bio->bi_size = 0;
 
+       btrfs_failed_bio = btrfs_io_bio(failed_bio);
+       if (btrfs_failed_bio->csum) {
+               struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+               u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
+
+               btrfs_bio = btrfs_io_bio(bio);
+               btrfs_bio->csum = btrfs_bio->csum_inline;
+               phy_offset >>= inode->i_sb->s_blocksize_bits;
+               phy_offset *= csum_size;
+               memcpy(btrfs_bio->csum, btrfs_failed_bio->csum + phy_offset,
+                      csum_size);
+       }
+
        bio_add_page(bio, page, failrec->len, start - page_offset(page));
 
        pr_debug("bio_readpage_error: submitting new read[%#x] to "
@@ -2450,6 +2369,18 @@ static void end_bio_extent_writepage(struct bio *bio, int err)
        bio_put(bio);
 }
 
+static void
+endio_readpage_release_extent(struct extent_io_tree *tree, u64 start, u64 len,
+                             int uptodate)
+{
+       struct extent_state *cached = NULL;
+       u64 end = start + len - 1;
+
+       if (uptodate && tree->track_uptodate)
+               set_extent_uptodate(tree, start, end, &cached, GFP_ATOMIC);
+       unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
+}
+
 /*
  * after a readpage IO is done, we need to:
  * clear the uptodate bits on error
@@ -2466,9 +2397,14 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
        int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
        struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1;
        struct bio_vec *bvec = bio->bi_io_vec;
+       struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
        struct extent_io_tree *tree;
+       u64 offset = 0;
        u64 start;
        u64 end;
+       u64 len;
+       u64 extent_start = 0;
+       u64 extent_len = 0;
        int mirror;
        int ret;
 
@@ -2477,9 +2413,6 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
        do {
                struct page *page = bvec->bv_page;
-               struct extent_state *cached = NULL;
-               struct extent_state *state;
-               struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
                struct inode *inode = page->mapping->host;
 
                pr_debug("end_bio_extent_readpage: bi_sector=%llu, err=%d, "
@@ -2500,37 +2433,32 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
 
                start = page_offset(page);
                end = start + bvec->bv_offset + bvec->bv_len - 1;
+               len = bvec->bv_len;
 
                if (++bvec <= bvec_end)
                        prefetchw(&bvec->bv_page->flags);
 
-               spin_lock(&tree->lock);
-               state = find_first_extent_bit_state(tree, start, EXTENT_LOCKED);
-               if (state && state->start == start) {
-                       /*
-                        * take a reference on the state, unlock will drop
-                        * the ref
-                        */
-                       cache_state(state, &cached);
-               }
-               spin_unlock(&tree->lock);
-
                mirror = io_bio->mirror_num;
-               if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) {
-                       ret = tree->ops->readpage_end_io_hook(page, start, end,
-                                                             state, mirror);
+               if (likely(uptodate && tree->ops &&
+                          tree->ops->readpage_end_io_hook)) {
+                       ret = tree->ops->readpage_end_io_hook(io_bio, offset,
+                                                             page, start, end,
+                                                             mirror);
                        if (ret)
                                uptodate = 0;
                        else
                                clean_io_failure(start, page);
                }
 
-               if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) {
+               if (likely(uptodate))
+                       goto readpage_ok;
+
+               if (tree->ops && tree->ops->readpage_io_failed_hook) {
                        ret = tree->ops->readpage_io_failed_hook(page, mirror);
                        if (!ret && !err &&
                            test_bit(BIO_UPTODATE, &bio->bi_flags))
                                uptodate = 1;
-               } else if (!uptodate) {
+               } else {
                        /*
                         * The generic bio_readpage_error handles errors the
                         * following way: If possible, new read requests are
@@ -2541,24 +2469,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                         * can't handle the error it will return -EIO and we
                         * remain responsible for that page.
                         */
-                       ret = bio_readpage_error(bio, page, start, end, mirror, NULL);
+                       ret = bio_readpage_error(bio, offset, page, start, end,
+                                                mirror);
                        if (ret == 0) {
                                uptodate =
                                        test_bit(BIO_UPTODATE, &bio->bi_flags);
                                if (err)
                                        uptodate = 0;
-                               uncache_state(&cached);
                                continue;
                        }
                }
-
-               if (uptodate && tree->track_uptodate) {
-                       set_extent_uptodate(tree, start, end, &cached,
-                                           GFP_ATOMIC);
-               }
-               unlock_extent_cached(tree, start, end, &cached, GFP_ATOMIC);
-
-               if (uptodate) {
+readpage_ok:
+               if (likely(uptodate)) {
                        loff_t i_size = i_size_read(inode);
                        pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
                        unsigned offset;
@@ -2573,8 +2495,36 @@ static void end_bio_extent_readpage(struct bio *bio, int err)
                        SetPageError(page);
                }
                unlock_page(page);
+               offset += len;
+
+               if (unlikely(!uptodate)) {
+                       if (extent_len) {
+                               endio_readpage_release_extent(tree,
+                                                             extent_start,
+                                                             extent_len, 1);
+                               extent_start = 0;
+                               extent_len = 0;
+                       }
+                       endio_readpage_release_extent(tree, start,
+                                                     end - start + 1, 0);
+               } else if (!extent_len) {
+                       extent_start = start;
+                       extent_len = end + 1 - start;
+               } else if (extent_start + extent_len == start) {
+                       extent_len += end + 1 - start;
+               } else {
+                       endio_readpage_release_extent(tree, extent_start,
+                                                     extent_len, uptodate);
+                       extent_start = start;
+                       extent_len = end + 1 - start;
+               }
        } while (bvec <= bvec_end);
 
+       if (extent_len)
+               endio_readpage_release_extent(tree, extent_start, extent_len,
+                                             uptodate);
+       if (io_bio->end_io)
+               io_bio->end_io(io_bio, err);
        bio_put(bio);
 }
 
@@ -2586,6 +2536,7 @@ struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags)
 {
+       struct btrfs_io_bio *btrfs_bio;
        struct bio *bio;
 
        bio = bio_alloc_bioset(gfp_flags, nr_vecs, btrfs_bioset);
@@ -2601,6 +2552,10 @@ btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                bio->bi_size = 0;
                bio->bi_bdev = bdev;
                bio->bi_sector = first_sector;
+               btrfs_bio = btrfs_io_bio(bio);
+               btrfs_bio->csum = NULL;
+               btrfs_bio->csum_allocated = NULL;
+               btrfs_bio->end_io = NULL;
        }
        return bio;
 }
@@ -2614,7 +2569,17 @@ struct bio *btrfs_bio_clone(struct bio *bio, gfp_t gfp_mask)
 /* this also allocates from the btrfs_bioset */
 struct bio *btrfs_io_bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
-       return bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+       struct btrfs_io_bio *btrfs_bio;
+       struct bio *bio;
+
+       bio = bio_alloc_bioset(gfp_mask, nr_iovecs, btrfs_bioset);
+       if (bio) {
+               btrfs_bio = btrfs_io_bio(bio);
+               btrfs_bio->csum = NULL;
+               btrfs_bio->csum_allocated = NULL;
+               btrfs_bio->end_io = NULL;
+       }
+       return bio;
 }
 
 
@@ -2738,17 +2703,45 @@ void set_page_extent_mapped(struct page *page)
        }
 }
 
+static struct extent_map *
+__get_extent_map(struct inode *inode, struct page *page, size_t pg_offset,
+                u64 start, u64 len, get_extent_t *get_extent,
+                struct extent_map **em_cached)
+{
+       struct extent_map *em;
+
+       if (em_cached && *em_cached) {
+               em = *em_cached;
+               if (em->in_tree && start >= em->start &&
+                   start < extent_map_end(em)) {
+                       atomic_inc(&em->refs);
+                       return em;
+               }
+
+               free_extent_map(em);
+               *em_cached = NULL;
+       }
+
+       em = get_extent(inode, page, pg_offset, start, len, 0);
+       if (em_cached && !IS_ERR_OR_NULL(em)) {
+               BUG_ON(*em_cached);
+               atomic_inc(&em->refs);
+               *em_cached = em;
+       }
+       return em;
+}
 /*
  * basic readpage implementation.  Locked extent state structs are inserted
  * into the tree that are removed when the IO is done (by the end_io
  * handlers)
  * XXX JDM: This needs looking at to ensure proper page locking
  */
-static int __extent_read_full_page(struct extent_io_tree *tree,
-                                  struct page *page,
-                                  get_extent_t *get_extent,
-                                  struct bio **bio, int mirror_num,
-                                  unsigned long *bio_flags, int rw)
+static int __do_readpage(struct extent_io_tree *tree,
+                        struct page *page,
+                        get_extent_t *get_extent,
+                        struct extent_map **em_cached,
+                        struct bio **bio, int mirror_num,
+                        unsigned long *bio_flags, int rw)
 {
        struct inode *inode = page->mapping->host;
        u64 start = page_offset(page);
@@ -2762,35 +2755,26 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
        sector_t sector;
        struct extent_map *em;
        struct block_device *bdev;
-       struct btrfs_ordered_extent *ordered;
        int ret;
        int nr = 0;
+       int parent_locked = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
        size_t pg_offset = 0;
        size_t iosize;
        size_t disk_io_size;
        size_t blocksize = inode->i_sb->s_blocksize;
-       unsigned long this_bio_flag = 0;
+       unsigned long this_bio_flag = *bio_flags & EXTENT_BIO_PARENT_LOCKED;
 
        set_page_extent_mapped(page);
 
+       end = page_end;
        if (!PageUptodate(page)) {
                if (cleancache_get_page(page) == 0) {
                        BUG_ON(blocksize != PAGE_SIZE);
+                       unlock_extent(tree, start, end);
                        goto out;
                }
        }
 
-       end = page_end;
-       while (1) {
-               lock_extent(tree, start, end);
-               ordered = btrfs_lookup_ordered_extent(inode, start);
-               if (!ordered)
-                       break;
-               unlock_extent(tree, start, end);
-               btrfs_start_ordered_extent(inode, ordered, 1);
-               btrfs_put_ordered_extent(ordered);
-       }
-
        if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
                char *userpage;
                size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
@@ -2817,15 +2801,18 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        kunmap_atomic(userpage);
                        set_extent_uptodate(tree, cur, cur + iosize - 1,
                                            &cached, GFP_NOFS);
-                       unlock_extent_cached(tree, cur, cur + iosize - 1,
-                                            &cached, GFP_NOFS);
+                       if (!parent_locked)
+                               unlock_extent_cached(tree, cur,
+                                                    cur + iosize - 1,
+                                                    &cached, GFP_NOFS);
                        break;
                }
-               em = get_extent(inode, page, pg_offset, cur,
-                               end - cur + 1, 0);
+               em = __get_extent_map(inode, page, pg_offset, cur,
+                                     end - cur + 1, get_extent, em_cached);
                if (IS_ERR_OR_NULL(em)) {
                        SetPageError(page);
-                       unlock_extent(tree, cur, end);
+                       if (!parent_locked)
+                               unlock_extent(tree, cur, end);
                        break;
                }
                extent_offset = cur - em->start;
@@ -2833,7 +2820,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                BUG_ON(end < cur);
 
                if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags)) {
-                       this_bio_flag = EXTENT_BIO_COMPRESSED;
+                       this_bio_flag |= EXTENT_BIO_COMPRESSED;
                        extent_set_compress_type(&this_bio_flag,
                                                 em->compress_type);
                }
@@ -2877,7 +2864,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                if (test_range_bit(tree, cur, cur_end,
                                   EXTENT_UPTODATE, 1, NULL)) {
                        check_page_uptodate(tree, page);
-                       unlock_extent(tree, cur, cur + iosize - 1);
+                       if (!parent_locked)
+                               unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -2887,7 +2875,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                 */
                if (block_start == EXTENT_MAP_INLINE) {
                        SetPageError(page);
-                       unlock_extent(tree, cur, cur + iosize - 1);
+                       if (!parent_locked)
+                               unlock_extent(tree, cur, cur + iosize - 1);
                        cur = cur + iosize;
                        pg_offset += iosize;
                        continue;
@@ -2905,7 +2894,8 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
                        *bio_flags = this_bio_flag;
                } else {
                        SetPageError(page);
-                       unlock_extent(tree, cur, cur + iosize - 1);
+                       if (!parent_locked)
+                               unlock_extent(tree, cur, cur + iosize - 1);
                }
                cur = cur + iosize;
                pg_offset += iosize;
@@ -2919,6 +2909,104 @@ out:
        return 0;
 }
 
+static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
+                                            struct page *pages[], int nr_pages,
+                                            u64 start, u64 end,
+                                            get_extent_t *get_extent,
+                                            struct extent_map **em_cached,
+                                            struct bio **bio, int mirror_num,
+                                            unsigned long *bio_flags, int rw)
+{
+       struct inode *inode;
+       struct btrfs_ordered_extent *ordered;
+       int index;
+
+       inode = pages[0]->mapping->host;
+       while (1) {
+               lock_extent(tree, start, end);
+               ordered = btrfs_lookup_ordered_range(inode, start,
+                                                    end - start + 1);
+               if (!ordered)
+                       break;
+               unlock_extent(tree, start, end);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_put_ordered_extent(ordered);
+       }
+
+       for (index = 0; index < nr_pages; index++) {
+               __do_readpage(tree, pages[index], get_extent, em_cached, bio,
+                             mirror_num, bio_flags, rw);
+               page_cache_release(pages[index]);
+       }
+}
+
+static void __extent_readpages(struct extent_io_tree *tree,
+                              struct page *pages[],
+                              int nr_pages, get_extent_t *get_extent,
+                              struct extent_map **em_cached,
+                              struct bio **bio, int mirror_num,
+                              unsigned long *bio_flags, int rw)
+{
+       u64 start = 0;
+       u64 end = 0;
+       u64 page_start;
+       int index;
+       int first_index = 0;
+
+       for (index = 0; index < nr_pages; index++) {
+               page_start = page_offset(pages[index]);
+               if (!end) {
+                       start = page_start;
+                       end = start + PAGE_CACHE_SIZE - 1;
+                       first_index = index;
+               } else if (end + 1 == page_start) {
+                       end += PAGE_CACHE_SIZE;
+               } else {
+                       __do_contiguous_readpages(tree, &pages[first_index],
+                                                 index - first_index, start,
+                                                 end, get_extent, em_cached,
+                                                 bio, mirror_num, bio_flags,
+                                                 rw);
+                       start = page_start;
+                       end = start + PAGE_CACHE_SIZE - 1;
+                       first_index = index;
+               }
+       }
+
+       if (end)
+               __do_contiguous_readpages(tree, &pages[first_index],
+                                         index - first_index, start,
+                                         end, get_extent, em_cached, bio,
+                                         mirror_num, bio_flags, rw);
+}
+
+static int __extent_read_full_page(struct extent_io_tree *tree,
+                                  struct page *page,
+                                  get_extent_t *get_extent,
+                                  struct bio **bio, int mirror_num,
+                                  unsigned long *bio_flags, int rw)
+{
+       struct inode *inode = page->mapping->host;
+       struct btrfs_ordered_extent *ordered;
+       u64 start = page_offset(page);
+       u64 end = start + PAGE_CACHE_SIZE - 1;
+       int ret;
+
+       while (1) {
+               lock_extent(tree, start, end);
+               ordered = btrfs_lookup_ordered_extent(inode, start);
+               if (!ordered)
+                       break;
+               unlock_extent(tree, start, end);
+               btrfs_start_ordered_extent(inode, ordered, 1);
+               btrfs_put_ordered_extent(ordered);
+       }
+
+       ret = __do_readpage(tree, page, get_extent, NULL, bio, mirror_num,
+                           bio_flags, rw);
+       return ret;
+}
+
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
                            get_extent_t *get_extent, int mirror_num)
 {
@@ -2933,6 +3021,20 @@ int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
        return ret;
 }
 
+int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
+                                get_extent_t *get_extent, int mirror_num)
+{
+       struct bio *bio = NULL;
+       unsigned long bio_flags = EXTENT_BIO_PARENT_LOCKED;
+       int ret;
+
+       ret = __do_readpage(tree, page, get_extent, NULL, &bio, mirror_num,
+                                     &bio_flags, READ);
+       if (bio)
+               ret = submit_one_bio(READ, bio, mirror_num, bio_flags);
+       return ret;
+}
+
 static noinline void update_nr_written(struct page *page,
                                      struct writeback_control *wbc,
                                      unsigned long nr_written)
@@ -3189,8 +3291,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
                        if (!PageWriteback(page)) {
                                printk(KERN_ERR "btrfs warning page %lu not "
                                       "writeback, cur %llu end %llu\n",
-                                      page->index, (unsigned long long)cur,
-                                      (unsigned long long)end);
+                                      page->index, cur, end);
                        }
 
                        ret = submit_extent_page(write_flags, tree, page,
@@ -3769,7 +3870,7 @@ int extent_readpages(struct extent_io_tree *tree,
        unsigned long bio_flags = 0;
        struct page *pagepool[16];
        struct page *page;
-       int i = 0;
+       struct extent_map *em_cached = NULL;
        int nr = 0;
 
        for (page_idx = 0; page_idx < nr_pages; page_idx++) {
@@ -3786,18 +3887,16 @@ int extent_readpages(struct extent_io_tree *tree,
                pagepool[nr++] = page;
                if (nr < ARRAY_SIZE(pagepool))
                        continue;
-               for (i = 0; i < nr; i++) {
-                       __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags, READ);
-                       page_cache_release(pagepool[i]);
-               }
+               __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+                                  &bio, 0, &bio_flags, READ);
                nr = 0;
        }
-       for (i = 0; i < nr; i++) {
-               __extent_read_full_page(tree, pagepool[i], get_extent,
-                                       &bio, 0, &bio_flags, READ);
-               page_cache_release(pagepool[i]);
-       }
+       if (nr)
+               __extent_readpages(tree, pagepool, nr, get_extent, &em_cached,
+                                  &bio, 0, &bio_flags, READ);
+
+       if (em_cached)
+               free_extent_map(em_cached);
 
        BUG_ON(!list_empty(pages));
        if (bio)
@@ -4136,6 +4235,76 @@ static void __free_extent_buffer(struct extent_buffer *eb)
        kmem_cache_free(extent_buffer_cache, eb);
 }
 
+static int extent_buffer_under_io(struct extent_buffer *eb)
+{
+       return (atomic_read(&eb->io_pages) ||
+               test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
+               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+}
+
+/*
+ * Helper for releasing extent buffer page.
+ */
+static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
+                                               unsigned long start_idx)
+{
+       unsigned long index;
+       unsigned long num_pages;
+       struct page *page;
+       int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
+
+       BUG_ON(extent_buffer_under_io(eb));
+
+       num_pages = num_extent_pages(eb->start, eb->len);
+       index = start_idx + num_pages;
+       if (start_idx >= index)
+               return;
+
+       do {
+               index--;
+               page = extent_buffer_page(eb, index);
+               if (page && mapped) {
+                       spin_lock(&page->mapping->private_lock);
+                       /*
+                        * We do this since we'll remove the pages after we've
+                        * removed the eb from the radix tree, so we could race
+                        * and have this page now attached to the new eb.  So
+                        * only clear page_private if it's still connected to
+                        * this eb.
+                        */
+                       if (PagePrivate(page) &&
+                           page->private == (unsigned long)eb) {
+                               BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
+                               BUG_ON(PageDirty(page));
+                               BUG_ON(PageWriteback(page));
+                               /*
+                                * We need to make sure we haven't be attached
+                                * to a new eb.
+                                */
+                               ClearPagePrivate(page);
+                               set_page_private(page, 0);
+                               /* One for the page private */
+                               page_cache_release(page);
+                       }
+                       spin_unlock(&page->mapping->private_lock);
+
+               }
+               if (page) {
+                       /* One for when we alloced the page */
+                       page_cache_release(page);
+               }
+       } while (index != start_idx);
+}
+
+/*
+ * Helper for releasing the extent buffer.
+ */
+static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
+{
+       btrfs_release_extent_buffer_page(eb, 0);
+       __free_extent_buffer(eb);
+}
+
 static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree,
                                                   u64 start,
                                                   unsigned long len,
@@ -4184,13 +4353,16 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src)
        struct extent_buffer *new;
        unsigned long num_pages = num_extent_pages(src->start, src->len);
 
-       new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_ATOMIC);
+       new = __alloc_extent_buffer(NULL, src->start, src->len, GFP_NOFS);
        if (new == NULL)
                return NULL;
 
        for (i = 0; i < num_pages; i++) {
-               p = alloc_page(GFP_ATOMIC);
-               BUG_ON(!p);
+               p = alloc_page(GFP_NOFS);
+               if (!p) {
+                       btrfs_release_extent_buffer(new);
+                       return NULL;
+               }
                attach_extent_buffer_page(new, p);
                WARN_ON(PageDirty(p));
                SetPageUptodate(p);
@@ -4210,12 +4382,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len)
        unsigned long num_pages = num_extent_pages(0, len);
        unsigned long i;
 
-       eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
+       eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
        if (!eb)
                return NULL;
 
        for (i = 0; i < num_pages; i++) {
-               eb->pages[i] = alloc_page(GFP_ATOMIC);
+               eb->pages[i] = alloc_page(GFP_NOFS);
                if (!eb->pages[i])
                        goto err;
        }
@@ -4231,76 +4403,6 @@ err:
        return NULL;
 }
 
-static int extent_buffer_under_io(struct extent_buffer *eb)
-{
-       return (atomic_read(&eb->io_pages) ||
-               test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags) ||
-               test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-}
-
-/*
- * Helper for releasing extent buffer page.
- */
-static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
-                                               unsigned long start_idx)
-{
-       unsigned long index;
-       unsigned long num_pages;
-       struct page *page;
-       int mapped = !test_bit(EXTENT_BUFFER_DUMMY, &eb->bflags);
-
-       BUG_ON(extent_buffer_under_io(eb));
-
-       num_pages = num_extent_pages(eb->start, eb->len);
-       index = start_idx + num_pages;
-       if (start_idx >= index)
-               return;
-
-       do {
-               index--;
-               page = extent_buffer_page(eb, index);
-               if (page && mapped) {
-                       spin_lock(&page->mapping->private_lock);
-                       /*
-                        * We do this since we'll remove the pages after we've
-                        * removed the eb from the radix tree, so we could race
-                        * and have this page now attached to the new eb.  So
-                        * only clear page_private if it's still connected to
-                        * this eb.
-                        */
-                       if (PagePrivate(page) &&
-                           page->private == (unsigned long)eb) {
-                               BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags));
-                               BUG_ON(PageDirty(page));
-                               BUG_ON(PageWriteback(page));
-                               /*
-                                * We need to make sure we haven't be attached
-                                * to a new eb.
-                                */
-                               ClearPagePrivate(page);
-                               set_page_private(page, 0);
-                               /* One for the page private */
-                               page_cache_release(page);
-                       }
-                       spin_unlock(&page->mapping->private_lock);
-
-               }
-               if (page) {
-                       /* One for when we alloced the page */
-                       page_cache_release(page);
-               }
-       } while (index != start_idx);
-}
-
-/*
- * Helper for releasing the extent buffer.
- */
-static inline void btrfs_release_extent_buffer(struct extent_buffer *eb)
-{
-       btrfs_release_extent_buffer_page(eb, 0);
-       __free_extent_buffer(eb);
-}
-
 static void check_buffer_tree_ref(struct extent_buffer *eb)
 {
        int refs;
@@ -4771,7 +4873,7 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                page = extent_buffer_page(eb, i);
@@ -4813,8 +4915,8 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
 
        if (start + min_len > eb->len) {
                WARN(1, KERN_ERR "btrfs bad mapping eb start %llu len %lu, "
-                      "wanted %lu %lu\n", (unsigned long long)eb->start,
-                      eb->len, start, min_len);
+                      "wanted %lu %lu\n",
+                      eb->start, eb->len, start, min_len);
                return -EINVAL;
        }
 
@@ -4841,7 +4943,7 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                page = extent_buffer_page(eb, i);
@@ -4875,7 +4977,7 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                page = extent_buffer_page(eb, i);
@@ -4905,7 +5007,7 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
        WARN_ON(start > eb->len);
        WARN_ON(start + len > eb->start + eb->len);
 
-       offset = (start_offset + start) & ((unsigned long)PAGE_CACHE_SIZE - 1);
+       offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                page = extent_buffer_page(eb, i);
@@ -4936,7 +5038,7 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
        WARN_ON(src->len != dst_len);
 
        offset = (start_offset + dst_offset) &
-               ((unsigned long)PAGE_CACHE_SIZE - 1);
+               (PAGE_CACHE_SIZE - 1);
 
        while (len > 0) {
                page = extent_buffer_page(dst, i);
@@ -5022,9 +5124,9 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
 
        while (len > 0) {
                dst_off_in_page = (start_offset + dst_offset) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+                       (PAGE_CACHE_SIZE - 1);
                src_off_in_page = (start_offset + src_offset) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+                       (PAGE_CACHE_SIZE - 1);
 
                dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
                src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
@@ -5075,9 +5177,9 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
                src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
 
                dst_off_in_page = (start_offset + dst_end) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+                       (PAGE_CACHE_SIZE - 1);
                src_off_in_page = (start_offset + src_end) &
-                       ((unsigned long)PAGE_CACHE_SIZE - 1);
+                       (PAGE_CACHE_SIZE - 1);
 
                cur = min_t(unsigned long, len, src_off_in_page + 1);
                cur = min(cur, dst_off_in_page + 1);
index 3b8c4e2..6dbc645 100644 (file)
@@ -29,6 +29,7 @@
  */
 #define EXTENT_BIO_COMPRESSED 1
 #define EXTENT_BIO_TREE_LOG 2
+#define EXTENT_BIO_PARENT_LOCKED 4
 #define EXTENT_BIO_FLAG_SHIFT 16
 
 /* these are bit numbers for test/set bit */
 #define EXTENT_BUFFER_DUMMY 9
 
 /* these are flags for extent_clear_unlock_delalloc */
-#define EXTENT_CLEAR_UNLOCK_PAGE 0x1
-#define EXTENT_CLEAR_UNLOCK     0x2
-#define EXTENT_CLEAR_DELALLOC   0x4
-#define EXTENT_CLEAR_DIRTY      0x8
-#define EXTENT_SET_WRITEBACK    0x10
-#define EXTENT_END_WRITEBACK    0x20
-#define EXTENT_SET_PRIVATE2     0x40
-#define EXTENT_CLEAR_ACCOUNTING  0x80
+#define PAGE_UNLOCK            (1 << 0)
+#define PAGE_CLEAR_DIRTY       (1 << 1)
+#define PAGE_SET_WRITEBACK     (1 << 2)
+#define PAGE_END_WRITEBACK     (1 << 3)
+#define PAGE_SET_PRIVATE2      (1 << 4)
 
 /*
  * page->private values.  Every page that is controlled by the extent
@@ -62,6 +60,7 @@
 
 struct extent_state;
 struct btrfs_root;
+struct btrfs_io_bio;
 
 typedef        int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
                                       struct bio *bio, int mirror_num,
@@ -77,8 +76,9 @@ struct extent_io_ops {
                              size_t size, struct bio *bio,
                              unsigned long bio_flags);
        int (*readpage_io_failed_hook)(struct page *page, int failed_mirror);
-       int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end,
-                                   struct extent_state *state, int mirror);
+       int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset,
+                                   struct page *page, u64 start, u64 end,
+                                   int mirror);
        int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end,
                                      struct extent_state *state, int uptodate);
        void (*set_bit_hook)(struct inode *inode, struct extent_state *state,
@@ -200,6 +200,8 @@ int unlock_extent_cached(struct extent_io_tree *tree, u64 start, u64 end,
 int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end);
 int extent_read_full_page(struct extent_io_tree *tree, struct page *page,
                          get_extent_t *get_extent, int mirror_num);
+int extent_read_full_page_nolock(struct extent_io_tree *tree, struct page *page,
+                                get_extent_t *get_extent, int mirror_num);
 int __init extent_io_init(void);
 void extent_io_exit(void);
 
@@ -261,11 +263,6 @@ int extent_readpages(struct extent_io_tree *tree,
                     get_extent_t get_extent);
 int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                __u64 start, __u64 len, get_extent_t *get_extent);
-int set_state_private(struct extent_io_tree *tree, u64 start, u64 private);
-void extent_cache_csums_dio(struct extent_io_tree *tree, u64 start, u32 csums[],
-                           int count);
-void extent_cache_csums(struct extent_io_tree *tree, struct bio *bio,
-                       int bvec_index, u32 csums[], int count);
 int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
 void set_page_extent_mapped(struct page *page);
 
@@ -330,10 +327,10 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset,
                      unsigned long *map_len);
 int extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end);
 int extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end);
-int extent_clear_unlock_delalloc(struct inode *inode,
-                               struct extent_io_tree *tree,
-                               u64 start, u64 end, struct page *locked_page,
-                               unsigned long op);
+int extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
+                                struct page *locked_page,
+                                unsigned long bits_to_clear,
+                                unsigned long page_ops);
 struct bio *
 btrfs_bio_alloc(struct block_device *bdev, u64 first_sector, int nr_vecs,
                gfp_t gfp_flags);
index a7bfc95..4f53159 100644 (file)
@@ -23,6 +23,7 @@
 #include "ctree.h"
 #include "disk-io.h"
 #include "transaction.h"
+#include "volumes.h"
 #include "print-tree.h"
 
 #define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
@@ -152,28 +153,54 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans,
        return ret;
 }
 
+static void btrfs_io_bio_endio_readpage(struct btrfs_io_bio *bio, int err)
+{
+       kfree(bio->csum_allocated);
+}
+
 static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
                                   struct inode *inode, struct bio *bio,
                                   u64 logical_offset, u32 *dst, int dio)
 {
-       u32 sum[16];
-       int len;
        struct bio_vec *bvec = bio->bi_io_vec;
-       int bio_index = 0;
+       struct btrfs_io_bio *btrfs_bio = btrfs_io_bio(bio);
+       struct btrfs_csum_item *item = NULL;
+       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       struct btrfs_path *path;
+       u8 *csum;
        u64 offset = 0;
        u64 item_start_offset = 0;
        u64 item_last_offset = 0;
        u64 disk_bytenr;
        u32 diff;
-       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+       int nblocks;
+       int bio_index = 0;
        int count;
-       struct btrfs_path *path;
-       struct btrfs_csum_item *item = NULL;
-       struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
+
+       nblocks = bio->bi_size >> inode->i_sb->s_blocksize_bits;
+       if (!dst) {
+               if (nblocks * csum_size > BTRFS_BIO_INLINE_CSUM_SIZE) {
+                       btrfs_bio->csum_allocated = kmalloc(nblocks * csum_size,
+                                                           GFP_NOFS);
+                       if (!btrfs_bio->csum_allocated) {
+                               btrfs_free_path(path);
+                               return -ENOMEM;
+                       }
+                       btrfs_bio->csum = btrfs_bio->csum_allocated;
+                       btrfs_bio->end_io = btrfs_io_bio_endio_readpage;
+               } else {
+                       btrfs_bio->csum = btrfs_bio->csum_inline;
+               }
+               csum = btrfs_bio->csum;
+       } else {
+               csum = (u8 *)dst;
+       }
+
        if (bio->bi_size > PAGE_CACHE_SIZE * 8)
                path->reada = 2;
 
@@ -194,11 +221,10 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
        if (dio)
                offset = logical_offset;
        while (bio_index < bio->bi_vcnt) {
-               len = min_t(int, ARRAY_SIZE(sum), bio->bi_vcnt - bio_index);
                if (!dio)
                        offset = page_offset(bvec->bv_page) + bvec->bv_offset;
-               count = btrfs_find_ordered_sum(inode, offset, disk_bytenr, sum,
-                                              len);
+               count = btrfs_find_ordered_sum(inode, offset, disk_bytenr,
+                                              (u32 *)csum, nblocks);
                if (count)
                        goto found;
 
@@ -213,7 +239,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
                                                 path, disk_bytenr, 0);
                        if (IS_ERR(item)) {
                                count = 1;
-                               sum[0] = 0;
+                               memset(csum, 0, csum_size);
                                if (BTRFS_I(inode)->root->root_key.objectid ==
                                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                                        set_extent_bits(io_tree, offset,
@@ -222,9 +248,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
                                } else {
                                        printk(KERN_INFO "btrfs no csum found "
                                               "for inode %llu start %llu\n",
-                                              (unsigned long long)
-                                              btrfs_ino(inode),
-                                              (unsigned long long)offset);
+                                              btrfs_ino(inode), offset);
                                }
                                item = NULL;
                                btrfs_release_path(path);
@@ -249,23 +273,14 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
                diff = disk_bytenr - item_start_offset;
                diff = diff / root->sectorsize;
                diff = diff * csum_size;
-               count = min_t(int, len, (item_last_offset - disk_bytenr) >>
-                                       inode->i_sb->s_blocksize_bits);
-               read_extent_buffer(path->nodes[0], sum,
+               count = min_t(int, nblocks, (item_last_offset - disk_bytenr) >>
+                                           inode->i_sb->s_blocksize_bits);
+               read_extent_buffer(path->nodes[0], csum,
                                   ((unsigned long)item) + diff,
                                   csum_size * count);
 found:
-               if (dst) {
-                       memcpy(dst, sum, count * csum_size);
-                       dst += count;
-               } else {
-                       if (dio)
-                               extent_cache_csums_dio(io_tree, offset, sum,
-                                                      count);
-                       else
-                               extent_cache_csums(io_tree, bio, bio_index, sum,
-                                           count);
-               }
+               csum += count * csum_size;
+               nblocks -= count;
                while (count--) {
                        disk_bytenr += bvec->bv_len;
                        offset += bvec->bv_len;
@@ -284,9 +299,19 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode,
 }
 
 int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode,
-                             struct bio *bio, u64 offset)
+                             struct btrfs_dio_private *dip, struct bio *bio,
+                             u64 offset)
 {
-       return __btrfs_lookup_bio_sums(root, inode, bio, offset, NULL, 1);
+       int len = (bio->bi_sector << 9) - dip->disk_bytenr;
+       u16 csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+       int ret;
+
+       len >>= inode->i_sb->s_blocksize_bits;
+       len *= csum_size;
+
+       ret = __btrfs_lookup_bio_sums(root, inode, bio, offset,
+                                     (u32 *)(dip->csum + len), 1);
+       return ret;
 }
 
 int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end,
index 4d2eb64..bc5072b 100644 (file)
@@ -1334,7 +1334,6 @@ fail:
 static noinline int check_can_nocow(struct inode *inode, loff_t pos,
                                    size_t *write_bytes)
 {
-       struct btrfs_trans_handle *trans;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_ordered_extent *ordered;
        u64 lockstart, lockend;
@@ -1356,16 +1355,8 @@ static noinline int check_can_nocow(struct inode *inode, loff_t pos,
                btrfs_put_ordered_extent(ordered);
        }
 
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend);
-               return PTR_ERR(trans);
-       }
-
        num_bytes = lockend - lockstart + 1;
-       ret = can_nocow_extent(trans, inode, lockstart, &num_bytes, NULL, NULL,
-                              NULL);
-       btrfs_end_transaction(trans, root);
+       ret = can_nocow_extent(inode, lockstart, &num_bytes, NULL, NULL, NULL);
        if (ret <= 0) {
                ret = 0;
        } else {
index b21a3cd..3f0ddfc 100644 (file)
@@ -221,12 +221,10 @@ int btrfs_truncate_free_space_cache(struct btrfs_root *root,
                                    struct btrfs_path *path,
                                    struct inode *inode)
 {
-       loff_t oldsize;
        int ret = 0;
 
-       oldsize = i_size_read(inode);
        btrfs_i_size_write(inode, 0);
-       truncate_pagecache(inode, oldsize, 0);
+       truncate_pagecache(inode, 0);
 
        /*
         * We don't need an orphan item because truncating the free space cache
@@ -308,7 +306,7 @@ static void io_ctl_unmap_page(struct io_ctl *io_ctl)
 
 static void io_ctl_map_page(struct io_ctl *io_ctl, int clear)
 {
-       BUG_ON(io_ctl->index >= io_ctl->num_pages);
+       ASSERT(io_ctl->index < io_ctl->num_pages);
        io_ctl->page = io_ctl->pages[io_ctl->index++];
        io_ctl->cur = kmap(io_ctl->page);
        io_ctl->orig = io_ctl->cur;
@@ -673,8 +671,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                btrfs_err(root->fs_info,
                        "free space inode generation (%llu) "
                        "did not match free space cache generation (%llu)",
-                       (unsigned long long)BTRFS_I(inode)->generation,
-                       (unsigned long long)generation);
+                       BTRFS_I(inode)->generation, generation);
                return 0;
        }
 
@@ -729,7 +726,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
                                goto free_cache;
                        }
                } else {
-                       BUG_ON(!num_bitmaps);
+                       ASSERT(num_bitmaps);
                        num_bitmaps--;
                        e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
                        if (!e->bitmap) {
@@ -1029,7 +1026,7 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode,
        leaf = path->nodes[0];
        if (ret > 0) {
                struct btrfs_key found_key;
-               BUG_ON(!path->slots[0]);
+               ASSERT(path->slots[0]);
                path->slots[0]--;
                btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
                if (found_key.objectid != BTRFS_FREE_SPACE_OBJECTID ||
@@ -1117,7 +1114,7 @@ int btrfs_write_out_cache(struct btrfs_root *root,
 static inline unsigned long offset_to_bit(u64 bitmap_start, u32 unit,
                                          u64 offset)
 {
-       BUG_ON(offset < bitmap_start);
+       ASSERT(offset >= bitmap_start);
        offset -= bitmap_start;
        return (unsigned long)(div_u64(offset, unit));
 }
@@ -1272,7 +1269,7 @@ tree_search_offset(struct btrfs_free_space_ctl *ctl,
                if (n) {
                        entry = rb_entry(n, struct btrfs_free_space,
                                        offset_index);
-                       BUG_ON(entry->offset > offset);
+                       ASSERT(entry->offset <= offset);
                } else {
                        if (fuzzy)
                                return entry;
@@ -1336,7 +1333,7 @@ static int link_free_space(struct btrfs_free_space_ctl *ctl,
 {
        int ret = 0;
 
-       BUG_ON(!info->bitmap && !info->bytes);
+       ASSERT(info->bytes || info->bitmap);
        ret = tree_insert_offset(&ctl->free_space_offset, info->offset,
                                 &info->offset_index, (info->bitmap != NULL));
        if (ret)
@@ -1359,7 +1356,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
 
        max_bitmaps = max(max_bitmaps, 1);
 
-       BUG_ON(ctl->total_bitmaps > max_bitmaps);
+       ASSERT(ctl->total_bitmaps <= max_bitmaps);
 
        /*
         * The goal is to keep the total amount of memory used per 1gb of space
@@ -1403,7 +1400,7 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
 
        start = offset_to_bit(info->offset, ctl->unit, offset);
        count = bytes_to_bits(bytes, ctl->unit);
-       BUG_ON(start + count > BITS_PER_BITMAP);
+       ASSERT(start + count <= BITS_PER_BITMAP);
 
        bitmap_clear(info->bitmap, start, count);
 
@@ -1426,7 +1423,7 @@ static void bitmap_set_bits(struct btrfs_free_space_ctl *ctl,
 
        start = offset_to_bit(info->offset, ctl->unit, offset);
        count = bytes_to_bits(bytes, ctl->unit);
-       BUG_ON(start + count > BITS_PER_BITMAP);
+       ASSERT(start + count <= BITS_PER_BITMAP);
 
        bitmap_set(info->bitmap, start, count);
 
@@ -1742,7 +1739,7 @@ no_cluster_bitmap:
        bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
                                         1, 0);
        if (!bitmap_info) {
-               BUG_ON(added);
+               ASSERT(added == 0);
                goto new_bitmap;
        }
 
@@ -1882,7 +1879,7 @@ out:
 
        if (ret) {
                printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret);
-               BUG_ON(ret == -EEXIST);
+               ASSERT(ret != -EEXIST);
        }
 
        return ret;
@@ -1991,8 +1988,7 @@ void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                if (info->bytes >= bytes && !block_group->ro)
                        count++;
                printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n",
-                      (unsigned long long)info->offset,
-                      (unsigned long long)info->bytes,
+                      info->offset, info->bytes,
                       (info->bitmap) ? "yes" : "no");
        }
        printk(KERN_INFO "block group has cluster?: %s\n",
@@ -2371,7 +2367,7 @@ again:
        rb_erase(&entry->offset_index, &ctl->free_space_offset);
        ret = tree_insert_offset(&cluster->root, entry->offset,
                                 &entry->offset_index, 1);
-       BUG_ON(ret); /* -EEXIST; Logic error */
+       ASSERT(!ret); /* -EEXIST; Logic error */
 
        trace_btrfs_setup_cluster(block_group, cluster,
                                  total_found * ctl->unit, 1);
@@ -2464,7 +2460,7 @@ setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group,
                ret = tree_insert_offset(&cluster->root, entry->offset,
                                         &entry->offset_index, 0);
                total_size += entry->bytes;
-               BUG_ON(ret); /* -EEXIST; Logic error */
+               ASSERT(!ret); /* -EEXIST; Logic error */
        } while (node && entry != last);
 
        cluster->max_size = max_extent;
@@ -2525,8 +2521,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group,
  * returns zero and sets up cluster if things worked out, otherwise
  * it returns -enospc
  */
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root,
+int btrfs_find_space_cluster(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group,
                             struct btrfs_free_cluster *cluster,
                             u64 offset, u64 bytes, u64 empty_size)
@@ -2856,7 +2851,7 @@ u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root)
 
                ret = search_bitmap(ctl, entry, &offset, &count);
                /* Logic error; Should be empty if it can't find anything */
-               BUG_ON(ret);
+               ASSERT(!ret);
 
                ino = offset;
                bitmap_clear_bits(ctl, entry, offset, 1);
@@ -2973,33 +2968,68 @@ int btrfs_write_out_ino_cache(struct btrfs_root *root,
 }
 
 #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
-static struct btrfs_block_group_cache *init_test_block_group(void)
+/*
+ * Use this if you need to make a bitmap or extent entry specifically, it
+ * doesn't do any of the merging that add_free_space does, this acts a lot like
+ * how the free space cache loading stuff works, so you can get really weird
+ * configurations.
+ */
+int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+                             u64 offset, u64 bytes, bool bitmap)
 {
-       struct btrfs_block_group_cache *cache;
+       struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
+       struct btrfs_free_space *info = NULL, *bitmap_info;
+       void *map = NULL;
+       u64 bytes_added;
+       int ret;
 
-       cache = kzalloc(sizeof(*cache), GFP_NOFS);
-       if (!cache)
-               return NULL;
-       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
-                                       GFP_NOFS);
-       if (!cache->free_space_ctl) {
-               kfree(cache);
-               return NULL;
+again:
+       if (!info) {
+               info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
+               if (!info)
+                       return -ENOMEM;
        }
 
-       cache->key.objectid = 0;
-       cache->key.offset = 1024 * 1024 * 1024;
-       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
-       cache->sectorsize = 4096;
+       if (!bitmap) {
+               spin_lock(&ctl->tree_lock);
+               info->offset = offset;
+               info->bytes = bytes;
+               ret = link_free_space(ctl, info);
+               spin_unlock(&ctl->tree_lock);
+               if (ret)
+                       kmem_cache_free(btrfs_free_space_cachep, info);
+               return ret;
+       }
+
+       if (!map) {
+               map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+               if (!map) {
+                       kmem_cache_free(btrfs_free_space_cachep, info);
+                       return -ENOMEM;
+               }
+       }
+
+       spin_lock(&ctl->tree_lock);
+       bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
+                                        1, 0);
+       if (!bitmap_info) {
+               info->bitmap = map;
+               map = NULL;
+               add_new_bitmap(ctl, info, offset);
+               bitmap_info = info;
+       }
 
-       spin_lock_init(&cache->lock);
-       INIT_LIST_HEAD(&cache->list);
-       INIT_LIST_HEAD(&cache->cluster_list);
-       INIT_LIST_HEAD(&cache->new_bg_list);
+       bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
+       bytes -= bytes_added;
+       offset += bytes_added;
+       spin_unlock(&ctl->tree_lock);
 
-       btrfs_init_free_space_ctl(cache);
+       if (bytes)
+               goto again;
 
-       return cache;
+       if (map)
+               kfree(map);
+       return 0;
 }
 
 /*
@@ -3007,8 +3037,8 @@ static struct btrfs_block_group_cache *init_test_block_group(void)
  * just used to check the absence of space, so if there is free space in the
  * range at all we will return 1.
  */
-static int check_exists(struct btrfs_block_group_cache *cache, u64 offset,
-                       u64 bytes)
+int test_check_exists(struct btrfs_block_group_cache *cache,
+                     u64 offset, u64 bytes)
 {
        struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
        struct btrfs_free_space *info;
@@ -3085,411 +3115,4 @@ out:
        spin_unlock(&ctl->tree_lock);
        return ret;
 }
-
-/*
- * Use this if you need to make a bitmap or extent entry specifically, it
- * doesn't do any of the merging that add_free_space does, this acts a lot like
- * how the free space cache loading stuff works, so you can get really weird
- * configurations.
- */
-static int add_free_space_entry(struct btrfs_block_group_cache *cache,
-                               u64 offset, u64 bytes, bool bitmap)
-{
-       struct btrfs_free_space_ctl *ctl = cache->free_space_ctl;
-       struct btrfs_free_space *info = NULL, *bitmap_info;
-       void *map = NULL;
-       u64 bytes_added;
-       int ret;
-
-again:
-       if (!info) {
-               info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS);
-               if (!info)
-                       return -ENOMEM;
-       }
-
-       if (!bitmap) {
-               spin_lock(&ctl->tree_lock);
-               info->offset = offset;
-               info->bytes = bytes;
-               ret = link_free_space(ctl, info);
-               spin_unlock(&ctl->tree_lock);
-               if (ret)
-                       kmem_cache_free(btrfs_free_space_cachep, info);
-               return ret;
-       }
-
-       if (!map) {
-               map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
-               if (!map) {
-                       kmem_cache_free(btrfs_free_space_cachep, info);
-                       return -ENOMEM;
-               }
-       }
-
-       spin_lock(&ctl->tree_lock);
-       bitmap_info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
-                                        1, 0);
-       if (!bitmap_info) {
-               info->bitmap = map;
-               map = NULL;
-               add_new_bitmap(ctl, info, offset);
-               bitmap_info = info;
-       }
-
-       bytes_added = add_bytes_to_bitmap(ctl, bitmap_info, offset, bytes);
-       bytes -= bytes_added;
-       offset += bytes_added;
-       spin_unlock(&ctl->tree_lock);
-
-       if (bytes)
-               goto again;
-
-       if (map)
-               kfree(map);
-       return 0;
-}
-
-#define test_msg(fmt, ...) printk(KERN_INFO "btrfs: selftest: " fmt, ##__VA_ARGS__)
-
-/*
- * This test just does basic sanity checking, making sure we can add an exten
- * entry and remove space from either end and the middle, and make sure we can
- * remove space that covers adjacent extent entries.
- */
-static int test_extents(struct btrfs_block_group_cache *cache)
-{
-       int ret = 0;
-
-       test_msg("Running extent only tests\n");
-
-       /* First just make sure we can remove an entire entry */
-       ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error adding initial extents %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error removing extent %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 0, 4 * 1024 * 1024)) {
-               test_msg("Full remove left some lingering space\n");
-               return -1;
-       }
-
-       /* Ok edge and middle cases now */
-       ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error adding half extent %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error removing tail end %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error removing front end %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
-       if (ret) {
-               test_msg("Error removing middle piece %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 0, 1 * 1024 * 1024)) {
-               test_msg("Still have space at the front\n");
-               return -1;
-       }
-
-       if (check_exists(cache, 2 * 1024 * 1024, 4096)) {
-               test_msg("Still have space in the middle\n");
-               return -1;
-       }
-
-       if (check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
-               test_msg("Still have space at the end\n");
-               return -1;
-       }
-
-       /* Cleanup */
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
-       return 0;
-}
-
-static int test_bitmaps(struct btrfs_block_group_cache *cache)
-{
-       u64 next_bitmap_offset;
-       int ret;
-
-       test_msg("Running bitmap only tests\n");
-
-       ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't create a bitmap entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error removing bitmap full range %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 0, 4 * 1024 * 1024)) {
-               test_msg("Left some space in bitmap\n");
-               return -1;
-       }
-
-       ret = add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add to our bitmap entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
-       if (ret) {
-               test_msg("Couldn't remove middle chunk %d\n", ret);
-               return ret;
-       }
-
-       /*
-        * The first bitmap we have starts at offset 0 so the next one is just
-        * at the end of the first bitmap.
-        */
-       next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
-
-       /* Test a bit straddling two bitmaps */
-       ret = add_free_space_entry(cache, next_bitmap_offset -
-                                  (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add space that straddles two bitmaps %d\n",
-                               ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, next_bitmap_offset -
-                                     (1 * 1024 * 1024), 2 * 1024 * 1024);
-       if (ret) {
-               test_msg("Couldn't remove overlapping space %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
-                        2 * 1024 * 1024)) {
-               test_msg("Left some space when removing overlapping\n");
-               return -1;
-       }
-
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
-       return 0;
-}
-
-/* This is the high grade jackassery */
-static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
-{
-       u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
-       int ret;
-
-       test_msg("Running bitmap and extent tests\n");
-
-       /*
-        * First let's do something simple, an extent at the same offset as the
-        * bitmap, but the free space completely in the extent and then
-        * completely in the bitmap.
-        */
-       ret = add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't create bitmap entry %d\n", ret);
-               return ret;
-       }
-
-       ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
-       if (ret) {
-               test_msg("Couldn't add extent entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
-       if (ret) {
-               test_msg("Couldn't remove extent entry %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 0, 1 * 1024 * 1024)) {
-               test_msg("Left remnants after our remove\n");
-               return -1;
-       }
-
-       /* Now to add back the extent entry and remove from the bitmap */
-       ret = add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
-       if (ret) {
-               test_msg("Couldn't re-add extent entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
-       if (ret) {
-               test_msg("Couldn't remove from bitmap %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
-               test_msg("Left remnants in the bitmap\n");
-               return -1;
-       }
-
-       /*
-        * Ok so a little more evil, extent entry and bitmap at the same offset,
-        * removing an overlapping chunk.
-        */
-       ret = add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add to a bitmap %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
-       if (ret) {
-               test_msg("Couldn't remove overlapping space %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
-               test_msg("Left over peices after removing overlapping\n");
-               return -1;
-       }
-
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
-       /* Now with the extent entry offset into the bitmap */
-       ret = add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add space to the bitmap %d\n", ret);
-               return ret;
-       }
-
-       ret = add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
-       if (ret) {
-               test_msg("Couldn't add extent to the cache %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
-       if (ret) {
-               test_msg("Problem removing overlapping space %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
-               test_msg("Left something behind when removing space");
-               return -1;
-       }
-
-       /*
-        * This has blown up in the past, the extent entry starts before the
-        * bitmap entry, but we're trying to remove an offset that falls
-        * completely within the bitmap range and is in both the extent entry
-        * and the bitmap entry, looks like this
-        *
-        *   [ extent ]
-        *      [ bitmap ]
-        *        [ del ]
-        */
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-       ret = add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
-                                  4 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add bitmap %d\n", ret);
-               return ret;
-       }
-
-       ret = add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
-                                  5 * 1024 * 1024, 0);
-       if (ret) {
-               test_msg("Couldn't add extent entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
-                                     5 * 1024 * 1024);
-       if (ret) {
-               test_msg("Failed to free our space %d\n", ret);
-               return ret;
-       }
-
-       if (check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
-                        5 * 1024 * 1024)) {
-               test_msg("Left stuff over\n");
-               return -1;
-       }
-
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-
-       /*
-        * This blew up before, we have part of the free space in a bitmap and
-        * then the entirety of the rest of the space in an extent.  This used
-        * to return -EAGAIN back from btrfs_remove_extent, make sure this
-        * doesn't happen.
-        */
-       ret = add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
-       if (ret) {
-               test_msg("Couldn't add bitmap entry %d\n", ret);
-               return ret;
-       }
-
-       ret = add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
-       if (ret) {
-               test_msg("Couldn't add extent entry %d\n", ret);
-               return ret;
-       }
-
-       ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
-       if (ret) {
-               test_msg("Error removing bitmap and extent overlapping %d\n", ret);
-               return ret;
-       }
-
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-       return 0;
-}
-
-void btrfs_test_free_space_cache(void)
-{
-       struct btrfs_block_group_cache *cache;
-
-       test_msg("Running btrfs free space cache tests\n");
-
-       cache = init_test_block_group();
-       if (!cache) {
-               test_msg("Couldn't run the tests\n");
-               return;
-       }
-
-       if (test_extents(cache))
-               goto out;
-       if (test_bitmaps(cache))
-               goto out;
-       if (test_bitmaps_and_extents(cache))
-               goto out;
-out:
-       __btrfs_remove_free_space_cache(cache->free_space_ctl);
-       kfree(cache->free_space_ctl);
-       kfree(cache);
-       test_msg("Free space cache tests finished\n");
-}
-#undef test_msg
-#else /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
-void btrfs_test_free_space_cache(void) {}
-#endif /* !CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
+#endif /* CONFIG_BTRFS_FS_RUN_SANITY_TESTS */
index 894116b..c749041 100644 (file)
@@ -98,8 +98,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group,
 u64 btrfs_find_ino_for_alloc(struct btrfs_root *fs_root);
 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
                           u64 bytes);
-int btrfs_find_space_cluster(struct btrfs_trans_handle *trans,
-                            struct btrfs_root *root,
+int btrfs_find_space_cluster(struct btrfs_root *root,
                             struct btrfs_block_group_cache *block_group,
                             struct btrfs_free_cluster *cluster,
                             u64 offset, u64 bytes, u64 empty_size);
@@ -113,6 +112,12 @@ int btrfs_return_cluster_to_free_space(
 int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group,
                           u64 *trimmed, u64 start, u64 end, u64 minlen);
 
-void btrfs_test_free_space_cache(void);
+/* Support functions for runnint our sanity tests */
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+int test_add_free_space_entry(struct btrfs_block_group_cache *cache,
+                             u64 offset, u64 bytes, bool bitmap);
+int test_check_exists(struct btrfs_block_group_cache *cache,
+                     u64 offset, u64 bytes);
+#endif
 
 #endif
index 7bdc83d..f338c56 100644 (file)
@@ -230,12 +230,13 @@ fail:
  * does the checks required to make sure the data is small enough
  * to fit as an inline extent.
  */
-static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
-                                struct btrfs_root *root,
-                                struct inode *inode, u64 start, u64 end,
-                                size_t compressed_size, int compress_type,
-                                struct page **compressed_pages)
+static noinline int cow_file_range_inline(struct btrfs_root *root,
+                                         struct inode *inode, u64 start,
+                                         u64 end, size_t compressed_size,
+                                         int compress_type,
+                                         struct page **compressed_pages)
 {
+       struct btrfs_trans_handle *trans;
        u64 isize = i_size_read(inode);
        u64 actual_end = min(end + 1, isize);
        u64 inline_len = actual_end - start;
@@ -256,9 +257,16 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                return 1;
        }
 
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
+
        ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1);
-       if (ret)
-               return ret;
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out;
+       }
 
        if (isize > actual_end)
                inline_len = min_t(u64, isize, actual_end);
@@ -267,15 +275,18 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
                                   compress_type, compressed_pages);
        if (ret && ret != -ENOSPC) {
                btrfs_abort_transaction(trans, root, ret);
-               return ret;
+               goto out;
        } else if (ret == -ENOSPC) {
-               return 1;
+               ret = 1;
+               goto out;
        }
 
        set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &BTRFS_I(inode)->runtime_flags);
        btrfs_delalloc_release_metadata(inode, end + 1 - start);
        btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
-       return 0;
+out:
+       btrfs_end_transaction(trans, root);
+       return ret;
 }
 
 struct async_extent {
@@ -343,7 +354,6 @@ static noinline int compress_file_range(struct inode *inode,
                                        int *num_added)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        u64 num_bytes;
        u64 blocksize = root->sectorsize;
        u64 actual_end;
@@ -461,45 +471,36 @@ again:
        }
 cont:
        if (start == 0) {
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-                       trans = NULL;
-                       goto cleanup_and_out;
-               }
-               trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
                /* lets try to make an inline extent */
                if (ret || total_in < (actual_end - start)) {
                        /* we didn't compress the entire range, try
                         * to make an uncompressed inline extent.
                         */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end, 0, 0, NULL);
+                       ret = cow_file_range_inline(root, inode, start, end,
+                                                   0, 0, NULL);
                } else {
                        /* try making a compressed inline extent */
-                       ret = cow_file_range_inline(trans, root, inode,
-                                                   start, end,
+                       ret = cow_file_range_inline(root, inode, start, end,
                                                    total_compressed,
                                                    compress_type, pages);
                }
                if (ret <= 0) {
+                       unsigned long clear_flags = EXTENT_DELALLOC |
+                               EXTENT_DEFRAG;
+                       clear_flags |= (ret < 0) ? EXTENT_DO_ACCOUNTING : 0;
+
                        /*
                         * inline extent creation worked or returned error,
                         * we don't need to create any more async work items.
                         * Unlock and free up our temp pages.
                         */
-                       extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, NULL,
-                            EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
-
-                       btrfs_end_transaction(trans, root);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                                    clear_flags, PAGE_UNLOCK |
+                                                    PAGE_CLEAR_DIRTY |
+                                                    PAGE_SET_WRITEBACK |
+                                                    PAGE_END_WRITEBACK);
                        goto free_pages_out;
                }
-               btrfs_end_transaction(trans, root);
        }
 
        if (will_compress) {
@@ -590,20 +591,6 @@ free_pages_out:
        kfree(pages);
 
        goto out;
-
-cleanup_and_out:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
-       if (!trans || IS_ERR(trans))
-               btrfs_error(root->fs_info, ret, "Failed to join transaction");
-       else
-               btrfs_abort_transaction(trans, root, ret);
-       goto free_pages_out;
 }
 
 /*
@@ -617,7 +604,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
 {
        struct async_extent *async_extent;
        u64 alloc_hint = 0;
-       struct btrfs_trans_handle *trans;
        struct btrfs_key ins;
        struct extent_map *em;
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -678,20 +664,10 @@ retry:
                lock_extent(io_tree, async_extent->start,
                            async_extent->start + async_extent->ram_size - 1);
 
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans)) {
-                       ret = PTR_ERR(trans);
-               } else {
-                       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-                       ret = btrfs_reserve_extent(trans, root,
+               ret = btrfs_reserve_extent(root,
                                           async_extent->compressed_size,
                                           async_extent->compressed_size,
                                           0, alloc_hint, &ins, 1);
-                       if (ret && ret != -ENOSPC)
-                               btrfs_abort_transaction(trans, root, ret);
-                       btrfs_end_transaction(trans, root);
-               }
-
                if (ret) {
                        int i;
 
@@ -770,16 +746,12 @@ retry:
                /*
                 * clear dirty, set writeback and unlock the pages.
                 */
-               extent_clear_unlock_delalloc(inode,
-                               &BTRFS_I(inode)->io_tree,
-                               async_extent->start,
+               extent_clear_unlock_delalloc(inode, async_extent->start,
                                async_extent->start +
                                async_extent->ram_size - 1,
-                               NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK |
-                               EXTENT_CLEAR_DELALLOC |
-                               EXTENT_CLEAR_DIRTY | EXTENT_SET_WRITEBACK);
-
+                               NULL, EXTENT_LOCKED | EXTENT_DELALLOC,
+                               PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                               PAGE_SET_WRITEBACK);
                ret = btrfs_submit_compressed_write(inode,
                                    async_extent->start,
                                    async_extent->ram_size,
@@ -798,16 +770,13 @@ out:
 out_free_reserve:
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
 out_free:
-       extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                    async_extent->start,
+       extent_clear_unlock_delalloc(inode, async_extent->start,
                                     async_extent->start +
                                     async_extent->ram_size - 1,
-                                    NULL, EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                                    NULL, EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG | EXTENT_DO_ACCOUNTING,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
        kfree(async_extent);
        goto again;
 }
@@ -857,14 +826,13 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start,
  * required to start IO on it.  It may be clean and already done with
  * IO when we return.
  */
-static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
-                                    struct inode *inode,
-                                    struct btrfs_root *root,
-                                    struct page *locked_page,
-                                    u64 start, u64 end, int *page_started,
-                                    unsigned long *nr_written,
-                                    int unlock)
+static noinline int cow_file_range(struct inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+                                  unsigned long *nr_written,
+                                  int unlock)
 {
+       struct btrfs_root *root = BTRFS_I(inode)->root;
        u64 alloc_hint = 0;
        u64 num_bytes;
        unsigned long ram_size;
@@ -885,29 +853,24 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
        /* if this is a small write inside eof, kick off defrag */
        if (num_bytes < 64 * 1024 &&
            (start > 0 || end + 1 < BTRFS_I(inode)->disk_i_size))
-               btrfs_add_inode_defrag(trans, inode);
+               btrfs_add_inode_defrag(NULL, inode);
 
        if (start == 0) {
                /* lets try to make an inline extent */
-               ret = cow_file_range_inline(trans, root, inode,
-                                           start, end, 0, 0, NULL);
+               ret = cow_file_range_inline(root, inode, start, end, 0, 0,
+                                           NULL);
                if (ret == 0) {
-                       extent_clear_unlock_delalloc(inode,
-                                    &BTRFS_I(inode)->io_tree,
-                                    start, end, NULL,
-                                    EXTENT_CLEAR_UNLOCK_PAGE |
-                                    EXTENT_CLEAR_UNLOCK |
-                                    EXTENT_CLEAR_DELALLOC |
-                                    EXTENT_CLEAR_DIRTY |
-                                    EXTENT_SET_WRITEBACK |
-                                    EXTENT_END_WRITEBACK);
+                       extent_clear_unlock_delalloc(inode, start, end, NULL,
+                                    EXTENT_LOCKED | EXTENT_DELALLOC |
+                                    EXTENT_DEFRAG, PAGE_UNLOCK |
+                                    PAGE_CLEAR_DIRTY | PAGE_SET_WRITEBACK |
+                                    PAGE_END_WRITEBACK);
 
                        *nr_written = *nr_written +
                             (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
                        *page_started = 1;
                        goto out;
                } else if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
                        goto out_unlock;
                }
        }
@@ -922,13 +885,11 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                unsigned long op;
 
                cur_alloc_size = disk_num_bytes;
-               ret = btrfs_reserve_extent(trans, root, cur_alloc_size,
+               ret = btrfs_reserve_extent(root, cur_alloc_size,
                                           root->sectorsize, 0, alloc_hint,
                                           &ins, 1);
-               if (ret < 0) {
-                       btrfs_abort_transaction(trans, root, ret);
+               if (ret < 0)
                        goto out_unlock;
-               }
 
                em = alloc_extent_map();
                if (!em) {
@@ -974,10 +935,8 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, start,
                                                      cur_alloc_size);
-                       if (ret) {
-                               btrfs_abort_transaction(trans, root, ret);
+                       if (ret)
                                goto out_reserve;
-                       }
                }
 
                if (disk_num_bytes < cur_alloc_size)
@@ -990,13 +949,13 @@ static noinline int __cow_file_range(struct btrfs_trans_handle *trans,
                 * Do set the Private2 bit so we know this page was properly
                 * setup for writepage
                 */
-               op = unlock ? EXTENT_CLEAR_UNLOCK_PAGE : 0;
-               op |= EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                       EXTENT_SET_PRIVATE2;
+               op = unlock ? PAGE_UNLOCK : 0;
+               op |= PAGE_SET_PRIVATE2;
 
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                                            start, start + ram_size - 1,
-                                            locked_page, op);
+               extent_clear_unlock_delalloc(inode, start,
+                                            start + ram_size - 1, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC,
+                                            op);
                disk_num_bytes -= cur_alloc_size;
                num_bytes -= cur_alloc_size;
                alloc_hint = ins.objectid + ins.offset;
@@ -1008,52 +967,14 @@ out:
 out_reserve:
        btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
 out_unlock:
-       extent_clear_unlock_delalloc(inode,
-                    &BTRFS_I(inode)->io_tree,
-                    start, end, locked_page,
-                    EXTENT_CLEAR_UNLOCK_PAGE |
-                    EXTENT_CLEAR_UNLOCK |
-                    EXTENT_CLEAR_DELALLOC |
-                    EXTENT_CLEAR_DIRTY |
-                    EXTENT_SET_WRITEBACK |
-                    EXTENT_END_WRITEBACK);
-
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
+                                    EXTENT_DELALLOC | EXTENT_DEFRAG,
+                                    PAGE_UNLOCK | PAGE_CLEAR_DIRTY |
+                                    PAGE_SET_WRITEBACK | PAGE_END_WRITEBACK);
        goto out;
 }
 
-static noinline int cow_file_range(struct inode *inode,
-                                  struct page *locked_page,
-                                  u64 start, u64 end, int *page_started,
-                                  unsigned long *nr_written,
-                                  int unlock)
-{
-       struct btrfs_trans_handle *trans;
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       int ret;
-
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-               return PTR_ERR(trans);
-       }
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
-       ret = __cow_file_range(trans, inode, root, locked_page, start, end,
-                              page_started, nr_written, unlock);
-
-       btrfs_end_transaction(trans, root);
-
-       return ret;
-}
-
 /*
  * work queue call back to started compression on a file and pages
  */
@@ -1221,15 +1142,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
 
        path = btrfs_alloc_path();
        if (!path) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                return -ENOMEM;
        }
 
@@ -1241,15 +1160,13 @@ static noinline int run_delalloc_nocow(struct inode *inode,
                trans = btrfs_join_transaction(root);
 
        if (IS_ERR(trans)) {
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            start, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
+               extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                            EXTENT_LOCKED | EXTENT_DELALLOC |
+                                            EXTENT_DO_ACCOUNTING |
+                                            EXTENT_DEFRAG, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
                btrfs_free_path(path);
                return PTR_ERR(trans);
        }
@@ -1369,9 +1286,9 @@ out_check:
 
                btrfs_release_path(path);
                if (cow_start != (u64)-1) {
-                       ret = __cow_file_range(trans, inode, root, locked_page,
-                                              cow_start, found_key.offset - 1,
-                                              page_started, nr_written, 1);
+                       ret = cow_file_range(inode, locked_page,
+                                            cow_start, found_key.offset - 1,
+                                            page_started, nr_written, 1);
                        if (ret) {
                                btrfs_abort_transaction(trans, root, ret);
                                goto error;
@@ -1428,11 +1345,11 @@ out_check:
                        }
                }
 
-               extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree,
-                               cur_offset, cur_offset + num_bytes - 1,
-                               locked_page, EXTENT_CLEAR_UNLOCK_PAGE |
-                               EXTENT_CLEAR_UNLOCK | EXTENT_CLEAR_DELALLOC |
-                               EXTENT_SET_PRIVATE2);
+               extent_clear_unlock_delalloc(inode, cur_offset,
+                                            cur_offset + num_bytes - 1,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC, PAGE_UNLOCK |
+                                            PAGE_SET_PRIVATE2);
                cur_offset = extent_end;
                if (cur_offset > end)
                        break;
@@ -1445,9 +1362,8 @@ out_check:
        }
 
        if (cow_start != (u64)-1) {
-               ret = __cow_file_range(trans, inode, root, locked_page,
-                                      cow_start, end,
-                                      page_started, nr_written, 1);
+               ret = cow_file_range(inode, locked_page, cow_start, end,
+                                    page_started, nr_written, 1);
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto error;
@@ -1460,16 +1376,13 @@ error:
                ret = err;
 
        if (ret && cur_offset < end)
-               extent_clear_unlock_delalloc(inode,
-                            &BTRFS_I(inode)->io_tree,
-                            cur_offset, end, locked_page,
-                            EXTENT_CLEAR_UNLOCK_PAGE |
-                            EXTENT_CLEAR_UNLOCK |
-                            EXTENT_CLEAR_DELALLOC |
-                            EXTENT_CLEAR_DIRTY |
-                            EXTENT_SET_WRITEBACK |
-                            EXTENT_END_WRITEBACK);
-
+               extent_clear_unlock_delalloc(inode, cur_offset, end,
+                                            locked_page, EXTENT_LOCKED |
+                                            EXTENT_DELALLOC | EXTENT_DEFRAG |
+                                            EXTENT_DO_ACCOUNTING, PAGE_UNLOCK |
+                                            PAGE_CLEAR_DIRTY |
+                                            PAGE_SET_WRITEBACK |
+                                            PAGE_END_WRITEBACK);
        btrfs_free_path(path);
        return ret;
 }
@@ -2132,6 +2045,7 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
                WARN_ON(1);
                return ret;
        }
+       ret = 0;
 
        while (1) {
                cond_resched();
@@ -2181,8 +2095,6 @@ static noinline int record_one_backref(u64 inum, u64 offset, u64 root_id,
                    old->len || extent_offset + num_bytes <=
                    old->extent_offset + old->offset)
                        continue;
-
-               ret = 0;
                break;
        }
 
@@ -2238,16 +2150,18 @@ static noinline bool record_extent_backrefs(struct btrfs_path *path,
 
 static int relink_is_mergable(struct extent_buffer *leaf,
                              struct btrfs_file_extent_item *fi,
-                             u64 disk_bytenr)
+                             struct new_sa_defrag_extent *new)
 {
-       if (btrfs_file_extent_disk_bytenr(leaf, fi) != disk_bytenr)
+       if (btrfs_file_extent_disk_bytenr(leaf, fi) != new->bytenr)
                return 0;
 
        if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG)
                return 0;
 
-       if (btrfs_file_extent_compression(leaf, fi) ||
-           btrfs_file_extent_encryption(leaf, fi) ||
+       if (btrfs_file_extent_compression(leaf, fi) != new->compress_type)
+               return 0;
+
+       if (btrfs_file_extent_encryption(leaf, fi) ||
            btrfs_file_extent_other_encoding(leaf, fi))
                return 0;
 
@@ -2391,8 +2305,8 @@ again:
                                    struct btrfs_file_extent_item);
                extent_len = btrfs_file_extent_num_bytes(leaf, fi);
 
-               if (relink_is_mergable(leaf, fi, new->bytenr) &&
-                   extent_len + found_key.offset == start) {
+               if (extent_len + found_key.offset == start &&
+                   relink_is_mergable(leaf, fi, new)) {
                        btrfs_set_file_extent_num_bytes(leaf, fi,
                                                        extent_len + len);
                        btrfs_mark_buffer_dirty(leaf);
@@ -2648,8 +2562,10 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
        struct extent_state *cached_state = NULL;
        struct new_sa_defrag_extent *new = NULL;
        int compress_type = 0;
-       int ret;
+       int ret = 0;
+       u64 logical_len = ordered_extent->len;
        bool nolock;
+       bool truncated = false;
 
        nolock = btrfs_is_free_space_inode(inode);
 
@@ -2658,6 +2574,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                goto out;
        }
 
+       if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered_extent->flags)) {
+               truncated = true;
+               logical_len = ordered_extent->truncated_len;
+               /* Truncated the entire extent, don't bother adding */
+               if (!logical_len)
+                       goto out;
+       }
+
        if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
                BUG_ON(!list_empty(&ordered_extent->list)); /* Logic error */
                btrfs_ordered_update_i_size(inode, 0, ordered_extent);
@@ -2713,15 +2637,14 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
                ret = btrfs_mark_extent_written(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->file_offset +
-                                               ordered_extent->len);
+                                               logical_len);
        } else {
                BUG_ON(root == root->fs_info->tree_root);
                ret = insert_reserved_file_extent(trans, inode,
                                                ordered_extent->file_offset,
                                                ordered_extent->start,
                                                ordered_extent->disk_len,
-                                               ordered_extent->len,
-                                               ordered_extent->len,
+                                               logical_len, logical_len,
                                                compress_type, 0, 0,
                                                BTRFS_FILE_EXTENT_REG);
        }
@@ -2753,17 +2676,27 @@ out:
        if (trans)
                btrfs_end_transaction(trans, root);
 
-       if (ret) {
-               clear_extent_uptodate(io_tree, ordered_extent->file_offset,
-                                     ordered_extent->file_offset +
-                                     ordered_extent->len - 1, NULL, GFP_NOFS);
+       if (ret || truncated) {
+               u64 start, end;
+
+               if (truncated)
+                       start = ordered_extent->file_offset + logical_len;
+               else
+                       start = ordered_extent->file_offset;
+               end = ordered_extent->file_offset + ordered_extent->len - 1;
+               clear_extent_uptodate(io_tree, start, end, NULL, GFP_NOFS);
+
+               /* Drop the cache for the part of the extent we didn't write. */
+               btrfs_drop_extent_cache(inode, start, end, 0);
 
                /*
                 * If the ordered extent had an IOERR or something else went
                 * wrong we need to return the space for this ordered extent
-                * back to the allocator.
+                * back to the allocator.  We only free the extent in the
+                * truncated case if we didn't write out the extent at all.
                 */
-               if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
+               if ((ret || !logical_len) &&
+                   !test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags) &&
                    !test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags))
                        btrfs_free_reserved_extent(root, ordered_extent->start,
                                                   ordered_extent->disk_len);
@@ -2827,16 +2760,16 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end,
  * if there's a match, we allow the bio to finish.  If not, the code in
  * extent_io.c will try to find good copies for us.
  */
-static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
-                              struct extent_state *state, int mirror)
+static int btrfs_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
+                                     u64 phy_offset, struct page *page,
+                                     u64 start, u64 end, int mirror)
 {
        size_t offset = start - page_offset(page);
        struct inode *inode = page->mapping->host;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        char *kaddr;
-       u64 private = ~(u32)0;
-       int ret;
        struct btrfs_root *root = BTRFS_I(inode)->root;
+       u32 csum_expected;
        u32 csum = ~(u32)0;
        static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
@@ -2856,19 +2789,13 @@ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
                return 0;
        }
 
-       if (state && state->start == start) {
-               private = state->private;
-               ret = 0;
-       } else {
-               ret = get_state_private(io_tree, start, &private);
-       }
-       kaddr = kmap_atomic(page);
-       if (ret)
-               goto zeroit;
+       phy_offset >>= inode->i_sb->s_blocksize_bits;
+       csum_expected = *(((u32 *)io_bio->csum) + phy_offset);
 
+       kaddr = kmap_atomic(page);
        csum = btrfs_csum_data(kaddr + offset, csum,  end - start + 1);
        btrfs_csum_final(csum, (char *)&csum);
-       if (csum != private)
+       if (csum != csum_expected)
                goto zeroit;
 
        kunmap_atomic(kaddr);
@@ -2877,14 +2804,12 @@ good:
 
 zeroit:
        if (__ratelimit(&_rs))
-               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u private %llu",
-                       (unsigned long long)btrfs_ino(page->mapping->host),
-                       (unsigned long long)start, csum,
-                       (unsigned long long)private);
+               btrfs_info(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                       btrfs_ino(page->mapping->host), start, csum, csum_expected);
        memset(kaddr + offset, 1, end - start + 1);
        flush_dcache_page(page);
        kunmap_atomic(kaddr);
-       if (private == 0)
+       if (csum_expected == 0)
                return 0;
        return -EIO;
 }
@@ -2971,8 +2896,10 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans,
            btrfs_root_refs(&root->root_item) > 0) {
                ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root,
                                            root->root_key.objectid);
-               BUG_ON(ret);
-               root->orphan_item_inserted = 0;
+               if (ret)
+                       btrfs_abort_transaction(trans, root, ret);
+               else
+                       root->orphan_item_inserted = 0;
        }
 
        if (block_rsv) {
@@ -3041,11 +2968,18 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode)
        /* insert an orphan item to track this unlinked/truncated file */
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
-               if (ret && ret != -EEXIST) {
-                       clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
-                                 &BTRFS_I(inode)->runtime_flags);
-                       btrfs_abort_transaction(trans, root, ret);
-                       return ret;
+               if (ret) {
+                       if (reserve) {
+                               clear_bit(BTRFS_INODE_ORPHAN_META_RESERVED,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_orphan_release_metadata(inode);
+                       }
+                       if (ret != -EEXIST) {
+                               clear_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
+                                         &BTRFS_I(inode)->runtime_flags);
+                               btrfs_abort_transaction(trans, root, ret);
+                               return ret;
+                       }
                }
                ret = 0;
        }
@@ -3084,17 +3018,15 @@ static int btrfs_orphan_del(struct btrfs_trans_handle *trans,
                release_rsv = 1;
        spin_unlock(&root->orphan_lock);
 
-       if (trans && delete_item) {
+       if (trans && delete_item)
                ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
-               BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
-       }
 
        if (release_rsv) {
                btrfs_orphan_release_metadata(inode);
                atomic_dec(&root->orphan_inodes);
        }
 
-       return 0;
+       return ret;
 }
 
 /*
@@ -3224,8 +3156,9 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                found_key.objectid);
                        ret = btrfs_del_orphan_item(trans, root,
                                                    found_key.objectid);
-                       BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
                        btrfs_end_transaction(trans, root);
+                       if (ret)
+                               goto out;
                        continue;
                }
 
@@ -3657,8 +3590,7 @@ static int __btrfs_unlink_inode(struct btrfs_trans_handle *trans,
        if (ret) {
                btrfs_info(root->fs_info,
                        "failed to delete reference to %.*s, inode %llu parent %llu",
-                       name_len, name,
-                       (unsigned long long)ino, (unsigned long long)dir_ino);
+                       name_len, name, ino, dir_ino);
                btrfs_abort_transaction(trans, root, ret);
                goto err;
        }
@@ -3929,6 +3861,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
        u64 extent_num_bytes = 0;
        u64 extent_offset = 0;
        u64 item_end = 0;
+       u64 last_size = (u64)-1;
        u32 found_type = (u8)-1;
        int found_extent;
        int del_item;
@@ -4026,6 +3959,11 @@ search_again:
                if (found_type != BTRFS_EXTENT_DATA_KEY)
                        goto delete;
 
+               if (del_item)
+                       last_size = found_key.offset;
+               else
+                       last_size = new_size;
+
                if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
                        u64 num_dec;
                        extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
@@ -4137,6 +4075,8 @@ out:
                        btrfs_abort_transaction(trans, root, ret);
        }
 error:
+       if (last_size != (u64)-1)
+               btrfs_ordered_update_i_size(inode, last_size, NULL);
        btrfs_free_path(path);
        return err;
 }
@@ -4409,7 +4349,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
 
        if (newsize > oldsize) {
-               truncate_pagecache(inode, oldsize, newsize);
+               truncate_pagecache(inode, newsize);
                ret = btrfs_cont_expand(inode, oldsize, newsize);
                if (ret)
                        return ret;
@@ -4465,8 +4405,26 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
                btrfs_inode_resume_unlocked_dio(inode);
 
                ret = btrfs_truncate(inode);
-               if (ret && inode->i_nlink)
-                       btrfs_orphan_del(NULL, inode);
+               if (ret && inode->i_nlink) {
+                       int err;
+
+                       /*
+                        * failed to truncate, disk_i_size is only adjusted down
+                        * as we remove extents, so it should represent the true
+                        * size of the inode, so reset the in memory size and
+                        * delete our orphan entry.
+                        */
+                       trans = btrfs_join_transaction(root);
+                       if (IS_ERR(trans)) {
+                               btrfs_orphan_del(NULL, inode);
+                               return ret;
+                       }
+                       i_size_write(inode, BTRFS_I(inode)->disk_i_size);
+                       err = btrfs_orphan_del(trans, inode);
+                       if (err)
+                               btrfs_abort_transaction(trans, root, err);
+                       btrfs_end_transaction(trans, root);
+               }
        }
 
        return ret;
@@ -4601,10 +4559,15 @@ void btrfs_evict_inode(struct inode *inode)
 
        btrfs_free_block_rsv(root, rsv);
 
+       /*
+        * Errors here aren't a big deal, it just means we leave orphan items
+        * in the tree.  They will be cleaned up on the next mount.
+        */
        if (ret == 0) {
                trans->block_rsv = root->orphan_block_rsv;
-               ret = btrfs_orphan_del(trans, inode);
-               BUG_ON(ret);
+               btrfs_orphan_del(trans, inode);
+       } else {
+               btrfs_orphan_del(NULL, inode);
        }
 
        trans->block_rsv = &root->fs_info->trans_block_rsv;
@@ -6161,10 +6124,7 @@ insert:
        btrfs_release_path(path);
        if (em->start > start || extent_map_end(em) <= start) {
                btrfs_err(root->fs_info, "bad extent! em: [%llu %llu] passed [%llu %llu]",
-                       (unsigned long long)em->start,
-                       (unsigned long long)em->len,
-                       (unsigned long long)start,
-                       (unsigned long long)len);
+                       em->start, em->len, start, len);
                err = -EIO;
                goto out;
        }
@@ -6362,39 +6322,32 @@ static struct extent_map *btrfs_new_extent_direct(struct inode *inode,
                                                  u64 start, u64 len)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct btrfs_trans_handle *trans;
        struct extent_map *em;
        struct btrfs_key ins;
        u64 alloc_hint;
        int ret;
 
-       trans = btrfs_join_transaction(root);
-       if (IS_ERR(trans))
-               return ERR_CAST(trans);
-
-       trans->block_rsv = &root->fs_info->delalloc_block_rsv;
-
        alloc_hint = get_extent_allocation_hint(inode, start, len);
-       ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0,
+       ret = btrfs_reserve_extent(root, len, root->sectorsize, 0,
                                   alloc_hint, &ins, 1);
-       if (ret) {
-               em = ERR_PTR(ret);
-               goto out;
-       }
+       if (ret)
+               return ERR_PTR(ret);
 
        em = create_pinned_em(inode, start, ins.offset, start, ins.objectid,
                              ins.offset, ins.offset, ins.offset, 0);
-       if (IS_ERR(em))
-               goto out;
+       if (IS_ERR(em)) {
+               btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
+               return em;
+       }
 
        ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid,
                                           ins.offset, ins.offset, 0);
        if (ret) {
                btrfs_free_reserved_extent(root, ins.objectid, ins.offset);
-               em = ERR_PTR(ret);
+               free_extent_map(em);
+               return ERR_PTR(ret);
        }
-out:
-       btrfs_end_transaction(trans, root);
+
        return em;
 }
 
@@ -6402,11 +6355,11 @@ out:
  * returns 1 when the nocow is safe, < 1 on error, 0 if the
  * block must be cow'd
  */
-noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
-                             struct inode *inode, u64 offset, u64 *len,
+noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len,
                              u64 *orig_start, u64 *orig_block_len,
                              u64 *ram_bytes)
 {
+       struct btrfs_trans_handle *trans;
        struct btrfs_path *path;
        int ret;
        struct extent_buffer *leaf;
@@ -6424,7 +6377,7 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
        if (!path)
                return -ENOMEM;
 
-       ret = btrfs_lookup_file_extent(trans, root, path, btrfs_ino(inode),
+       ret = btrfs_lookup_file_extent(NULL, root, path, btrfs_ino(inode),
                                       offset, 0);
        if (ret < 0)
                goto out;
@@ -6489,9 +6442,19 @@ noinline int can_nocow_extent(struct btrfs_trans_handle *trans,
         * look for other files referencing this extent, if we
         * find any we must cow
         */
-       if (btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
-                                 key.offset - backref_offset, disk_bytenr))
+       trans = btrfs_join_transaction(root);
+       if (IS_ERR(trans)) {
+               ret = 0;
                goto out;
+       }
+
+       ret = btrfs_cross_ref_exist(trans, root, btrfs_ino(inode),
+                                   key.offset - backref_offset, disk_bytenr);
+       btrfs_end_transaction(trans, root);
+       if (ret) {
+               ret = 0;
+               goto out;
+       }
 
        /*
         * adjust disk_bytenr and num_bytes to cover just the bytes
@@ -6633,7 +6596,6 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
        u64 start = iblock << inode->i_blkbits;
        u64 lockstart, lockend;
        u64 len = bh_result->b_size;
-       struct btrfs_trans_handle *trans;
        int unlock_bits = EXTENT_LOCKED;
        int ret = 0;
 
@@ -6715,16 +6677,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                len = min(len, em->len - (start - em->start));
                block_start = em->block_start + (start - em->start);
 
-               /*
-                * we're not going to log anything, but we do need
-                * to make sure the current transaction stays open
-                * while we look for nocow cross refs
-                */
-               trans = btrfs_join_transaction(root);
-               if (IS_ERR(trans))
-                       goto must_cow;
-
-               if (can_nocow_extent(trans, inode, start, &len, &orig_start,
+               if (can_nocow_extent(inode, start, &len, &orig_start,
                                     &orig_block_len, &ram_bytes) == 1) {
                        if (type == BTRFS_ORDERED_PREALLOC) {
                                free_extent_map(em);
@@ -6733,24 +6686,20 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
                                                       block_start, len,
                                                       orig_block_len,
                                                       ram_bytes, type);
-                               if (IS_ERR(em)) {
-                                       btrfs_end_transaction(trans, root);
+                               if (IS_ERR(em))
                                        goto unlock_err;
-                               }
                        }
 
                        ret = btrfs_add_ordered_extent_dio(inode, start,
                                           block_start, len, len, type);
-                       btrfs_end_transaction(trans, root);
                        if (ret) {
                                free_extent_map(em);
                                goto unlock_err;
                        }
                        goto unlock;
                }
-               btrfs_end_transaction(trans, root);
        }
-must_cow:
+
        /*
         * this will cow the extent, reset the len in case we changed
         * it above
@@ -6813,26 +6762,6 @@ unlock_err:
        return ret;
 }
 
-struct btrfs_dio_private {
-       struct inode *inode;
-       u64 logical_offset;
-       u64 disk_bytenr;
-       u64 bytes;
-       void *private;
-
-       /* number of bios pending for this dio */
-       atomic_t pending_bios;
-
-       /* IO errors */
-       int errors;
-
-       /* orig_bio is our btrfs_io_bio */
-       struct bio *orig_bio;
-
-       /* dio_bio came from fs/direct-io.c */
-       struct bio *dio_bio;
-};
-
 static void btrfs_endio_direct_read(struct bio *bio, int err)
 {
        struct btrfs_dio_private *dip = bio->bi_private;
@@ -6841,6 +6770,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
        struct inode *inode = dip->inode;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct bio *dio_bio;
+       u32 *csums = (u32 *)dip->csum;
+       int index = 0;
        u64 start;
 
        start = dip->logical_offset;
@@ -6849,12 +6780,8 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        struct page *page = bvec->bv_page;
                        char *kaddr;
                        u32 csum = ~(u32)0;
-                       u64 private = ~(u32)0;
                        unsigned long flags;
 
-                       if (get_state_private(&BTRFS_I(inode)->io_tree,
-                                             start, &private))
-                               goto failed;
                        local_irq_save(flags);
                        kaddr = kmap_atomic(page);
                        csum = btrfs_csum_data(kaddr + bvec->bv_offset,
@@ -6864,18 +6791,17 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
                        local_irq_restore(flags);
 
                        flush_dcache_page(bvec->bv_page);
-                       if (csum != private) {
-failed:
-                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u private %u",
-                                       (unsigned long long)btrfs_ino(inode),
-                                       (unsigned long long)start,
-                                       csum, (unsigned)private);
+                       if (csum != csums[index]) {
+                               btrfs_err(root->fs_info, "csum failed ino %llu off %llu csum %u expected csum %u",
+                                         btrfs_ino(inode), start, csum,
+                                         csums[index]);
                                err = -EIO;
                        }
                }
 
                start += bvec->bv_len;
                bvec++;
+               index++;
        } while (bvec <= bvec_end);
 
        unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset,
@@ -6956,7 +6882,7 @@ static void btrfs_end_dio_bio(struct bio *bio, int err)
        if (err) {
                printk(KERN_ERR "btrfs direct IO failed ino %llu rw %lu "
                      "sector %#Lx len %u err no %d\n",
-                     (unsigned long long)btrfs_ino(dip->inode), bio->bi_rw,
+                     btrfs_ino(dip->inode), bio->bi_rw,
                      (unsigned long long)bio->bi_sector, bio->bi_size, err);
                dip->errors = 1;
 
@@ -6992,6 +6918,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                                         int rw, u64 file_offset, int skip_sum,
                                         int async_submit)
 {
+       struct btrfs_dio_private *dip = bio->bi_private;
        int write = rw & REQ_WRITE;
        struct btrfs_root *root = BTRFS_I(inode)->root;
        int ret;
@@ -7026,7 +6953,8 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
                if (ret)
                        goto err;
        } else if (!skip_sum) {
-               ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
+               ret = btrfs_lookup_bio_sums_dio(root, inode, dip, bio,
+                                               file_offset);
                if (ret)
                        goto err;
        }
@@ -7061,6 +6989,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
                bio_put(orig_bio);
                return -EIO;
        }
+
        if (map_length >= orig_bio->bi_size) {
                bio = orig_bio;
                goto submit;
@@ -7156,19 +7085,28 @@ static void btrfs_submit_direct(int rw, struct bio *dio_bio,
        struct btrfs_dio_private *dip;
        struct bio *io_bio;
        int skip_sum;
+       int sum_len;
        int write = rw & REQ_WRITE;
        int ret = 0;
+       u16 csum_size;
 
        skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
 
        io_bio = btrfs_bio_clone(dio_bio, GFP_NOFS);
-
        if (!io_bio) {
                ret = -ENOMEM;
                goto free_ordered;
        }
 
-       dip = kmalloc(sizeof(*dip), GFP_NOFS);
+       if (!skip_sum && !write) {
+               csum_size = btrfs_super_csum_size(root->fs_info->super_copy);
+               sum_len = dio_bio->bi_size >> inode->i_sb->s_blocksize_bits;
+               sum_len *= csum_size;
+       } else {
+               sum_len = 0;
+       }
+
+       dip = kmalloc(sizeof(*dip) + sum_len, GFP_NOFS);
        if (!dip) {
                ret = -ENOMEM;
                goto free_io_bio;
@@ -7443,10 +7381,23 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
                 * whoever cleared the private bit is responsible
                 * for the finish_ordered_io
                 */
-               if (TestClearPagePrivate2(page) &&
-                   btrfs_dec_test_ordered_pending(inode, &ordered, page_start,
-                                                  PAGE_CACHE_SIZE, 1)) {
-                       btrfs_finish_ordered_io(ordered);
+               if (TestClearPagePrivate2(page)) {
+                       struct btrfs_ordered_inode_tree *tree;
+                       u64 new_len;
+
+                       tree = &BTRFS_I(inode)->ordered_tree;
+
+                       spin_lock_irq(&tree->lock);
+                       set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
+                       new_len = page_start - ordered->file_offset;
+                       if (new_len < ordered->truncated_len)
+                               ordered->truncated_len = new_len;
+                       spin_unlock_irq(&tree->lock);
+
+                       if (btrfs_dec_test_ordered_pending(inode, &ordered,
+                                                          page_start,
+                                                          PAGE_CACHE_SIZE, 1))
+                               btrfs_finish_ordered_io(ordered);
                }
                btrfs_put_ordered_extent(ordered);
                cached_state = NULL;
@@ -7612,7 +7563,6 @@ static int btrfs_truncate(struct inode *inode)
        u64 min_size = btrfs_calc_trunc_metadata_size(root, 1);
 
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
-       btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
 
        /*
         * Yes ladies and gentelment, this is indeed ugly.  The fact is we have
@@ -7876,7 +7826,7 @@ void btrfs_destroy_inode(struct inode *inode)
        if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
                     &BTRFS_I(inode)->runtime_flags)) {
                btrfs_info(root->fs_info, "inode %llu still on the orphan list",
-                       (unsigned long long)btrfs_ino(inode));
+                       btrfs_ino(inode));
                atomic_dec(&root->orphan_inodes);
        }
 
@@ -7886,8 +7836,7 @@ void btrfs_destroy_inode(struct inode *inode)
                        break;
                else {
                        btrfs_err(root->fs_info, "found ordered extent %llu %llu on inode cleanup",
-                               (unsigned long long)ordered->file_offset,
-                               (unsigned long long)ordered->len);
+                               ordered->file_offset, ordered->len);
                        btrfs_remove_ordered_extent(inode, ordered);
                        btrfs_put_ordered_extent(ordered);
                        btrfs_put_ordered_extent(ordered);
@@ -8161,10 +8110,8 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                                                 new_dentry->d_name.name,
                                                 new_dentry->d_name.len);
                }
-               if (!ret && new_inode->i_nlink == 0) {
+               if (!ret && new_inode->i_nlink == 0)
                        ret = btrfs_orphan_add(trans, new_dentry->d_inode);
-                       BUG_ON(ret);
-               }
                if (ret) {
                        btrfs_abort_transaction(trans, root, ret);
                        goto out_fail;
@@ -8525,8 +8472,8 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
 
                cur_bytes = min(num_bytes, 256ULL * 1024 * 1024);
                cur_bytes = max(cur_bytes, min_size);
-               ret = btrfs_reserve_extent(trans, root, cur_bytes,
-                                          min_size, 0, *alloc_hint, &ins, 1);
+               ret = btrfs_reserve_extent(root, cur_bytes, min_size, 0,
+                                          *alloc_hint, &ins, 1);
                if (ret) {
                        if (own_trans)
                                btrfs_end_transaction(trans, root);
index 238a055..1a5b946 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/blkdev.h>
 #include <linux/uuid.h>
 #include <linux/btrfs.h>
+#include <linux/uaccess.h>
 #include "compat.h"
 #include "ctree.h"
 #include "disk-io.h"
@@ -57,6 +58,9 @@
 #include "send.h"
 #include "dev-replace.h"
 
+static int btrfs_clone(struct inode *src, struct inode *inode,
+                      u64 off, u64 olen, u64 olen_aligned, u64 destoff);
+
 /* Mask out flags that are inappropriate for the given type of inode. */
 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags)
 {
@@ -363,6 +367,13 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
        return 0;
 }
 
+int btrfs_is_empty_uuid(u8 *uuid)
+{
+       static char empty_uuid[BTRFS_UUID_SIZE] = {0};
+
+       return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE);
+}
+
 static noinline int create_subvol(struct inode *dir,
                                  struct dentry *dentry,
                                  char *name, int namelen,
@@ -396,7 +407,7 @@ static noinline int create_subvol(struct inode *dir,
         * of create_snapshot().
         */
        ret = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-                                              7, &qgroup_reserved);
+                                              8, &qgroup_reserved, false);
        if (ret)
                return ret;
 
@@ -425,26 +436,25 @@ static noinline int create_subvol(struct inode *dir,
        btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV);
        btrfs_set_header_owner(leaf, objectid);
 
-       write_extent_buffer(leaf, root->fs_info->fsid,
-                           (unsigned long)btrfs_header_fsid(leaf),
+       write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(leaf),
                            BTRFS_FSID_SIZE);
        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
-                           (unsigned long)btrfs_header_chunk_tree_uuid(leaf),
+                           btrfs_header_chunk_tree_uuid(leaf),
                            BTRFS_UUID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
 
        memset(&root_item, 0, sizeof(root_item));
 
        inode_item = &root_item.inode;
-       inode_item->generation = cpu_to_le64(1);
-       inode_item->size = cpu_to_le64(3);
-       inode_item->nlink = cpu_to_le32(1);
-       inode_item->nbytes = cpu_to_le64(root->leafsize);
-       inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
+       btrfs_set_stack_inode_generation(inode_item, 1);
+       btrfs_set_stack_inode_size(inode_item, 3);
+       btrfs_set_stack_inode_nlink(inode_item, 1);
+       btrfs_set_stack_inode_nbytes(inode_item, root->leafsize);
+       btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755);
 
-       root_item.flags = 0;
-       root_item.byte_limit = 0;
-       inode_item->flags = cpu_to_le64(BTRFS_INODE_ROOT_ITEM_INIT);
+       btrfs_set_root_flags(&root_item, 0);
+       btrfs_set_root_limit(&root_item, 0);
+       btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT);
 
        btrfs_set_root_bytenr(&root_item, leaf->start);
        btrfs_set_root_generation(&root_item, trans->transid);
@@ -457,8 +467,8 @@ static noinline int create_subvol(struct inode *dir,
                        btrfs_root_generation(&root_item));
        uuid_le_gen(&new_uuid);
        memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE);
-       root_item.otime.sec = cpu_to_le64(cur_time.tv_sec);
-       root_item.otime.nsec = cpu_to_le32(cur_time.tv_nsec);
+       btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec);
+       btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec);
        root_item.ctime = root_item.otime;
        btrfs_set_root_ctransid(&root_item, trans->transid);
        btrfs_set_root_otransid(&root_item, trans->transid);
@@ -518,9 +528,14 @@ static noinline int create_subvol(struct inode *dir,
        ret = btrfs_add_root_ref(trans, root->fs_info->tree_root,
                                 objectid, root->root_key.objectid,
                                 btrfs_ino(dir), index, name, namelen);
-
        BUG_ON(ret);
 
+       ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+                                 root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+                                 objectid);
+       if (ret)
+               btrfs_abort_transaction(trans, root, ret);
+
 fail:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
@@ -573,10 +588,12 @@ static int create_snapshot(struct btrfs_root *root, struct inode *dir,
         * 1 - root item
         * 2 - root ref/backref
         * 1 - root of snapshot
+        * 1 - UUID item
         */
        ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root,
-                                       &pending_snapshot->block_rsv, 7,
-                                       &pending_snapshot->qgroup_reserved);
+                                       &pending_snapshot->block_rsv, 8,
+                                       &pending_snapshot->qgroup_reserved,
+                                       false);
        if (ret)
                goto out;
 
@@ -1267,9 +1284,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                        cluster = max_cluster;
                }
 
-               if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
-                       BTRFS_I(inode)->force_compress = compress_type;
-
                if (i + cluster > ra_index) {
                        ra_index = max(i, ra_index);
                        btrfs_force_ra(inode->i_mapping, ra, file, ra_index,
@@ -1278,6 +1292,8 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                }
 
                mutex_lock(&inode->i_mutex);
+               if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)
+                       BTRFS_I(inode)->force_compress = compress_type;
                ret = cluster_pages_for_defrag(inode, pages, i, cluster);
                if (ret < 0) {
                        mutex_unlock(&inode->i_mutex);
@@ -1334,10 +1350,6 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
                            atomic_read(&root->fs_info->async_delalloc_pages) == 0));
                }
                atomic_dec(&root->fs_info->async_submit_draining);
-
-               mutex_lock(&inode->i_mutex);
-               BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
-               mutex_unlock(&inode->i_mutex);
        }
 
        if (range->compress_type == BTRFS_COMPRESS_LZO) {
@@ -1347,6 +1359,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
        ret = defrag_count;
 
 out_ra:
+       if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) {
+               mutex_lock(&inode->i_mutex);
+               BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE;
+               mutex_unlock(&inode->i_mutex);
+       }
        if (!file)
                kfree(ra);
        kfree(pages);
@@ -1377,9 +1394,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
 
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
-               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
                mnt_drop_write_file(file);
-               return -EINVAL;
+               return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
        }
 
        mutex_lock(&root->fs_info->volume_mutex);
@@ -1403,14 +1419,13 @@ static noinline int btrfs_ioctl_resize(struct file *file,
                        ret = -EINVAL;
                        goto out_free;
                }
-               printk(KERN_INFO "btrfs: resizing devid %llu\n",
-                      (unsigned long long)devid);
+               printk(KERN_INFO "btrfs: resizing devid %llu\n", devid);
        }
 
        device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
        if (!device) {
                printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
-                      (unsigned long long)devid);
+                      devid);
                ret = -ENODEV;
                goto out_free;
        }
@@ -1418,7 +1433,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        if (!device->writeable) {
                printk(KERN_INFO "btrfs: resizer unable to apply on "
                       "readonly device %llu\n",
-                      (unsigned long long)devid);
+                      devid);
                ret = -EPERM;
                goto out_free;
        }
@@ -1470,8 +1485,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
        new_size *= root->sectorsize;
 
        printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n",
-                     rcu_str_deref(device->name),
-                     (unsigned long long)new_size);
+                     rcu_str_deref(device->name), new_size);
 
        if (new_size > old_size) {
                trans = btrfs_start_transaction(root, 0);
@@ -1721,13 +1735,28 @@ out:
 static noinline int may_destroy_subvol(struct btrfs_root *root)
 {
        struct btrfs_path *path;
+       struct btrfs_dir_item *di;
        struct btrfs_key key;
+       u64 dir_id;
        int ret;
 
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
 
+       /* Make sure this root isn't set as the default subvol */
+       dir_id = btrfs_super_root_dir(root->fs_info->super_copy);
+       di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path,
+                                  dir_id, "default", 7, 0);
+       if (di && !IS_ERR(di)) {
+               btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key);
+               if (key.objectid == root->root_key.objectid) {
+                       ret = -ENOTEMPTY;
+                       goto out;
+               }
+               btrfs_release_path(path);
+       }
+
        key.objectid = root->root_key.objectid;
        key.type = BTRFS_ROOT_REF_KEY;
        key.offset = (u64)-1;
@@ -1993,25 +2022,29 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
                ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
                if (ret < 0)
                        goto out;
+               else if (ret > 0) {
+                       ret = btrfs_previous_item(root, path, dirid,
+                                                 BTRFS_INODE_REF_KEY);
+                       if (ret < 0)
+                               goto out;
+                       else if (ret > 0) {
+                               ret = -ENOENT;
+                               goto out;
+                       }
+               }
 
                l = path->nodes[0];
                slot = path->slots[0];
-               if (ret > 0 && slot > 0)
-                       slot--;
                btrfs_item_key_to_cpu(l, &key, slot);
 
-               if (ret > 0 && (key.objectid != dirid ||
-                               key.type != BTRFS_INODE_REF_KEY)) {
-                       ret = -ENOENT;
-                       goto out;
-               }
-
                iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref);
                len = btrfs_inode_ref_name_len(l, iref);
                ptr -= len + 1;
                total_len += len + 1;
-               if (ptr < name)
+               if (ptr < name) {
+                       ret = -ENAMETOOLONG;
                        goto out;
+               }
 
                *(ptr + len) = '/';
                read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len);
@@ -2024,8 +2057,6 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
                key.offset = (u64)-1;
                dirid = key.objectid;
        }
-       if (ptr < name)
-               goto out;
        memmove(name, ptr, total_len);
        name[total_len]='\0';
        ret = 0;
@@ -2174,7 +2205,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
         * ref/backref.
         */
        err = btrfs_subvolume_reserve_metadata(root, &block_rsv,
-                                              5, &qgroup_reserved);
+                                              5, &qgroup_reserved, true);
        if (err)
                goto out_up_write;
 
@@ -2213,6 +2244,27 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                        goto out_end_trans;
                }
        }
+
+       ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+                                 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL,
+                                 dest->root_key.objectid);
+       if (ret && ret != -ENOENT) {
+               btrfs_abort_transaction(trans, root, ret);
+               err = ret;
+               goto out_end_trans;
+       }
+       if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) {
+               ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+                                         dest->root_item.received_uuid,
+                                         BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                         dest->root_key.objectid);
+               if (ret && ret != -ENOENT) {
+                       btrfs_abort_transaction(trans, root, ret);
+                       err = ret;
+                       goto out_end_trans;
+               }
+       }
+
 out_end_trans:
        trans->block_rsv = NULL;
        trans->bytes_reserved = 0;
@@ -2326,8 +2378,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
 
        if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
                        1)) {
-               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               return -EINVAL;
+               return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
        }
 
        mutex_lock(&root->fs_info->volume_mutex);
@@ -2400,10 +2451,10 @@ static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg)
        if (!fi_args)
                return -ENOMEM;
 
+       mutex_lock(&fs_devices->device_list_mutex);
        fi_args->num_devices = fs_devices->num_devices;
        memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid));
 
-       mutex_lock(&fs_devices->device_list_mutex);
        list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
                if (device->devid > fi_args->max_id)
                        fi_args->max_id = device->devid;
@@ -2424,7 +2475,6 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
        int ret = 0;
        char *s_uuid = NULL;
-       char empty_uuid[BTRFS_UUID_SIZE] = {0};
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -2433,7 +2483,7 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg)
        if (IS_ERR(di_args))
                return PTR_ERR(di_args);
 
-       if (memcmp(empty_uuid, di_args->uuid, BTRFS_UUID_SIZE) != 0)
+       if (!btrfs_is_empty_uuid(di_args->uuid))
                s_uuid = di_args->uuid;
 
        mutex_lock(&fs_devices->device_list_mutex);
@@ -2469,150 +2519,336 @@ out:
        return ret;
 }
 
-static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
-                                      u64 off, u64 olen, u64 destoff)
+static struct page *extent_same_get_page(struct inode *inode, u64 off)
+{
+       struct page *page;
+       pgoff_t index;
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+
+       index = off >> PAGE_CACHE_SHIFT;
+
+       page = grab_cache_page(inode->i_mapping, index);
+       if (!page)
+               return NULL;
+
+       if (!PageUptodate(page)) {
+               if (extent_read_full_page_nolock(tree, page, btrfs_get_extent,
+                                                0))
+                       return NULL;
+               lock_page(page);
+               if (!PageUptodate(page)) {
+                       unlock_page(page);
+                       page_cache_release(page);
+                       return NULL;
+               }
+       }
+       unlock_page(page);
+
+       return page;
+}
+
+static inline void lock_extent_range(struct inode *inode, u64 off, u64 len)
+{
+       /* do any pending delalloc/csum calc on src, one way or
+          another, and lock file content */
+       while (1) {
+               struct btrfs_ordered_extent *ordered;
+               lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+               ordered = btrfs_lookup_first_ordered_extent(inode,
+                                                           off + len - 1);
+               if (!ordered &&
+                   !test_range_bit(&BTRFS_I(inode)->io_tree, off,
+                                   off + len - 1, EXTENT_DELALLOC, 0, NULL))
+                       break;
+               unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1);
+               if (ordered)
+                       btrfs_put_ordered_extent(ordered);
+               btrfs_wait_ordered_range(inode, off, len);
+       }
+}
+
+static void btrfs_double_unlock(struct inode *inode1, u64 loff1,
+                               struct inode *inode2, u64 loff2, u64 len)
+{
+       unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1);
+       unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1);
+
+       mutex_unlock(&inode1->i_mutex);
+       mutex_unlock(&inode2->i_mutex);
+}
+
+static void btrfs_double_lock(struct inode *inode1, u64 loff1,
+                             struct inode *inode2, u64 loff2, u64 len)
+{
+       if (inode1 < inode2) {
+               swap(inode1, inode2);
+               swap(loff1, loff2);
+       }
+
+       mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+       lock_extent_range(inode1, loff1, len);
+       if (inode1 != inode2) {
+               mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+               lock_extent_range(inode2, loff2, len);
+       }
+}
+
+static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
+                         u64 dst_loff, u64 len)
+{
+       int ret = 0;
+       struct page *src_page, *dst_page;
+       unsigned int cmp_len = PAGE_CACHE_SIZE;
+       void *addr, *dst_addr;
+
+       while (len) {
+               if (len < PAGE_CACHE_SIZE)
+                       cmp_len = len;
+
+               src_page = extent_same_get_page(src, loff);
+               if (!src_page)
+                       return -EINVAL;
+               dst_page = extent_same_get_page(dst, dst_loff);
+               if (!dst_page) {
+                       page_cache_release(src_page);
+                       return -EINVAL;
+               }
+               addr = kmap_atomic(src_page);
+               dst_addr = kmap_atomic(dst_page);
+
+               flush_dcache_page(src_page);
+               flush_dcache_page(dst_page);
+
+               if (memcmp(addr, dst_addr, cmp_len))
+                       ret = BTRFS_SAME_DATA_DIFFERS;
+
+               kunmap_atomic(addr);
+               kunmap_atomic(dst_addr);
+               page_cache_release(src_page);
+               page_cache_release(dst_page);
+
+               if (ret)
+                       break;
+
+               loff += cmp_len;
+               dst_loff += cmp_len;
+               len -= cmp_len;
+       }
+
+       return ret;
+}
+
+static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len)
+{
+       u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize;
+
+       if (off + len > inode->i_size || off + len < off)
+               return -EINVAL;
+       /* Check that we are block aligned - btrfs_clone() requires this */
+       if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs))
+               return -EINVAL;
+
+       return 0;
+}
+
+static int btrfs_extent_same(struct inode *src, u64 loff, u64 len,
+                            struct inode *dst, u64 dst_loff)
 {
-       struct inode *inode = file_inode(file);
-       struct btrfs_root *root = BTRFS_I(inode)->root;
-       struct fd src_file;
-       struct inode *src;
-       struct btrfs_trans_handle *trans;
-       struct btrfs_path *path;
-       struct extent_buffer *leaf;
-       char *buf;
-       struct btrfs_key key;
-       u32 nritems;
-       int slot;
        int ret;
-       u64 len = olen;
-       u64 bs = root->fs_info->sb->s_blocksize;
-       int same_inode = 0;
 
        /*
-        * TODO:
-        * - split compressed inline extents.  annoying: we need to
-        *   decompress into destination's address_space (the file offset
-        *   may change, so source mapping won't do), then recompress (or
-        *   otherwise reinsert) a subrange.
-        * - allow ranges within the same file to be cloned (provided
-        *   they don't overlap)?
+        * btrfs_clone() can't handle extents in the same file
+        * yet. Once that works, we can drop this check and replace it
+        * with a check for the same inode, but overlapping extents.
         */
-
-       /* the destination must be opened for writing */
-       if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+       if (src == dst)
                return -EINVAL;
 
-       if (btrfs_root_readonly(root))
-               return -EROFS;
+       btrfs_double_lock(src, loff, dst, dst_loff, len);
+
+       ret = extent_same_check_offsets(src, loff, len);
+       if (ret)
+               goto out_unlock;
+
+       ret = extent_same_check_offsets(dst, dst_loff, len);
+       if (ret)
+               goto out_unlock;
+
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) {
+               ret = -EINVAL;
+               goto out_unlock;
+       }
+
+       ret = btrfs_cmp_data(src, loff, dst, dst_loff, len);
+       if (ret == 0)
+               ret = btrfs_clone(src, dst, loff, len, len, dst_loff);
+
+out_unlock:
+       btrfs_double_unlock(src, loff, dst, dst_loff, len);
+
+       return ret;
+}
+
+#define BTRFS_MAX_DEDUPE_LEN   (16 * 1024 * 1024)
+
+static long btrfs_ioctl_file_extent_same(struct file *file,
+                                        void __user *argp)
+{
+       struct btrfs_ioctl_same_args *args = argp;
+       struct btrfs_ioctl_same_args same;
+       struct btrfs_ioctl_same_extent_info info;
+       struct inode *src = file->f_dentry->d_inode;
+       struct file *dst_file = NULL;
+       struct inode *dst;
+       u64 off;
+       u64 len;
+       int i;
+       int ret;
+       u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize;
+       bool is_admin = capable(CAP_SYS_ADMIN);
+
+       if (!(file->f_mode & FMODE_READ))
+               return -EINVAL;
 
        ret = mnt_want_write_file(file);
        if (ret)
                return ret;
 
-       src_file = fdget(srcfd);
-       if (!src_file.file) {
-               ret = -EBADF;
-               goto out_drop_write;
+       if (copy_from_user(&same,
+                          (struct btrfs_ioctl_same_args __user *)argp,
+                          sizeof(same))) {
+               ret = -EFAULT;
+               goto out;
        }
 
-       ret = -EXDEV;
-       if (src_file.file->f_path.mnt != file->f_path.mnt)
-               goto out_fput;
+       off = same.logical_offset;
+       len = same.length;
 
-       src = file_inode(src_file.file);
+       /*
+        * Limit the total length we will dedupe for each operation.
+        * This is intended to bound the total time spent in this
+        * ioctl to something sane.
+        */
+       if (len > BTRFS_MAX_DEDUPE_LEN)
+               len = BTRFS_MAX_DEDUPE_LEN;
 
-       ret = -EINVAL;
-       if (src == inode)
-               same_inode = 1;
+       if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+               /*
+                * Btrfs does not support blocksize < page_size. As a
+                * result, btrfs_cmp_data() won't correctly handle
+                * this situation without an update.
+                */
+               ret = -EINVAL;
+               goto out;
+       }
 
-       /* the src must be open for reading */
-       if (!(src_file.file->f_mode & FMODE_READ))
-               goto out_fput;
+       ret = -EISDIR;
+       if (S_ISDIR(src->i_mode))
+               goto out;
 
-       /* don't make the dst file partly checksummed */
-       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
-           (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
-               goto out_fput;
+       ret = -EACCES;
+       if (!S_ISREG(src->i_mode))
+               goto out;
 
-       ret = -EISDIR;
-       if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
-               goto out_fput;
+       ret = 0;
+       for (i = 0; i < same.dest_count; i++) {
+               if (copy_from_user(&info, &args->info[i], sizeof(info))) {
+                       ret = -EFAULT;
+                       goto out;
+               }
 
-       ret = -EXDEV;
-       if (src->i_sb != inode->i_sb)
-               goto out_fput;
+               info.bytes_deduped = 0;
 
-       ret = -ENOMEM;
-       buf = vmalloc(btrfs_level_size(root, 0));
-       if (!buf)
-               goto out_fput;
+               dst_file = fget(info.fd);
+               if (!dst_file) {
+                       info.status = -EBADF;
+                       goto next;
+               }
 
-       path = btrfs_alloc_path();
-       if (!path) {
-               vfree(buf);
-               goto out_fput;
-       }
-       path->reada = 2;
+               if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
+                       info.status = -EINVAL;
+                       goto next;
+               }
 
-       if (!same_inode) {
-               if (inode < src) {
-                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
-                       mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
-               } else {
-                       mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
-                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               info.status = -EXDEV;
+               if (file->f_path.mnt != dst_file->f_path.mnt)
+                       goto next;
+
+               dst = dst_file->f_dentry->d_inode;
+               if (src->i_sb != dst->i_sb)
+                       goto next;
+
+               if (S_ISDIR(dst->i_mode)) {
+                       info.status = -EISDIR;
+                       goto next;
                }
-       } else {
-               mutex_lock(&src->i_mutex);
-       }
 
-       /* determine range to clone */
-       ret = -EINVAL;
-       if (off + len > src->i_size || off + len < off)
-               goto out_unlock;
-       if (len == 0)
-               olen = len = src->i_size - off;
-       /* if we extend to eof, continue to block boundary */
-       if (off + len == src->i_size)
-               len = ALIGN(src->i_size, bs) - off;
+               if (!S_ISREG(dst->i_mode)) {
+                       info.status = -EACCES;
+                       goto next;
+               }
 
-       /* verify the end result is block aligned */
-       if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
-           !IS_ALIGNED(destoff, bs))
-               goto out_unlock;
+               info.status = btrfs_extent_same(src, off, len, dst,
+                                               info.logical_offset);
+               if (info.status == 0)
+                       info.bytes_deduped += len;
 
-       /* verify if ranges are overlapped within the same file */
-       if (same_inode) {
-               if (destoff + len > off && destoff < off + len)
-                       goto out_unlock;
-       }
+next:
+               if (dst_file)
+                       fput(dst_file);
 
-       if (destoff > inode->i_size) {
-               ret = btrfs_cont_expand(inode, inode->i_size, destoff);
-               if (ret)
-                       goto out_unlock;
+               if (__put_user_unaligned(info.status, &args->info[i].status) ||
+                   __put_user_unaligned(info.bytes_deduped,
+                                        &args->info[i].bytes_deduped)) {
+                       ret = -EFAULT;
+                       goto out;
+               }                                                               
        }
 
-       /* truncate page cache pages from target inode range */
-       truncate_inode_pages_range(&inode->i_data, destoff,
-                                  PAGE_CACHE_ALIGN(destoff + len) - 1);
+out:
+       mnt_drop_write_file(file);
+       return ret;
+}
 
-       /* do any pending delalloc/csum calc on src, one way or
-          another, and lock file content */
-       while (1) {
-               struct btrfs_ordered_extent *ordered;
-               lock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
-               ordered = btrfs_lookup_first_ordered_extent(src, off + len - 1);
-               if (!ordered &&
-                   !test_range_bit(&BTRFS_I(src)->io_tree, off, off + len - 1,
-                                   EXTENT_DELALLOC, 0, NULL))
-                       break;
-               unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
-               if (ordered)
-                       btrfs_put_ordered_extent(ordered);
-               btrfs_wait_ordered_range(src, off, len);
+/**
+ * btrfs_clone() - clone a range from inode file to another
+ *
+ * @src: Inode to clone from
+ * @inode: Inode to clone to
+ * @off: Offset within source to start clone from
+ * @olen: Original length, passed by user, of range to clone
+ * @olen_aligned: Block-aligned value of olen, extent_same uses
+ *               identical values here
+ * @destoff: Offset within @inode to start clone
+ */
+static int btrfs_clone(struct inode *src, struct inode *inode,
+                      u64 off, u64 olen, u64 olen_aligned, u64 destoff)
+{
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_path *path = NULL;
+       struct extent_buffer *leaf;
+       struct btrfs_trans_handle *trans;
+       char *buf = NULL;
+       struct btrfs_key key;
+       u32 nritems;
+       int slot;
+       int ret;
+       u64 len = olen_aligned;
+
+       ret = -ENOMEM;
+       buf = vmalloc(btrfs_level_size(root, 0));
+       if (!buf)
+               return ret;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               vfree(buf);
+               return ret;
        }
 
+       path->reada = 2;
        /* clone data */
        key.objectid = btrfs_ino(src);
        key.type = BTRFS_EXTENT_DATA_KEY;
@@ -2858,15 +3094,132 @@ next:
                key.offset++;
        }
        ret = 0;
+
 out:
        btrfs_release_path(path);
+       btrfs_free_path(path);
+       vfree(buf);
+       return ret;
+}
+
+static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+                                      u64 off, u64 olen, u64 destoff)
+{
+       struct inode *inode = fdentry(file)->d_inode;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct fd src_file;
+       struct inode *src;
+       int ret;
+       u64 len = olen;
+       u64 bs = root->fs_info->sb->s_blocksize;
+       int same_inode = 0;
+
+       /*
+        * TODO:
+        * - split compressed inline extents.  annoying: we need to
+        *   decompress into destination's address_space (the file offset
+        *   may change, so source mapping won't do), then recompress (or
+        *   otherwise reinsert) a subrange.
+        * - allow ranges within the same file to be cloned (provided
+        *   they don't overlap)?
+        */
+
+       /* the destination must be opened for writing */
+       if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND))
+               return -EINVAL;
+
+       if (btrfs_root_readonly(root))
+               return -EROFS;
+
+       ret = mnt_want_write_file(file);
+       if (ret)
+               return ret;
+
+       src_file = fdget(srcfd);
+       if (!src_file.file) {
+               ret = -EBADF;
+               goto out_drop_write;
+       }
+
+       ret = -EXDEV;
+       if (src_file.file->f_path.mnt != file->f_path.mnt)
+               goto out_fput;
+
+       src = file_inode(src_file.file);
+
+       ret = -EINVAL;
+       if (src == inode)
+               same_inode = 1;
+
+       /* the src must be open for reading */
+       if (!(src_file.file->f_mode & FMODE_READ))
+               goto out_fput;
+
+       /* don't make the dst file partly checksummed */
+       if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) !=
+           (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM))
+               goto out_fput;
+
+       ret = -EISDIR;
+       if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode))
+               goto out_fput;
+
+       ret = -EXDEV;
+       if (src->i_sb != inode->i_sb)
+               goto out_fput;
+
+       if (!same_inode) {
+               if (inode < src) {
+                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
+               } else {
+                       mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+                       mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
+               }
+       } else {
+               mutex_lock(&src->i_mutex);
+       }
+
+       /* determine range to clone */
+       ret = -EINVAL;
+       if (off + len > src->i_size || off + len < off)
+               goto out_unlock;
+       if (len == 0)
+               olen = len = src->i_size - off;
+       /* if we extend to eof, continue to block boundary */
+       if (off + len == src->i_size)
+               len = ALIGN(src->i_size, bs) - off;
+
+       /* verify the end result is block aligned */
+       if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) ||
+           !IS_ALIGNED(destoff, bs))
+               goto out_unlock;
+
+       /* verify if ranges are overlapped within the same file */
+       if (same_inode) {
+               if (destoff + len > off && destoff < off + len)
+                       goto out_unlock;
+       }
+
+       if (destoff > inode->i_size) {
+               ret = btrfs_cont_expand(inode, inode->i_size, destoff);
+               if (ret)
+                       goto out_unlock;
+       }
+
+       /* truncate page cache pages from target inode range */
+       truncate_inode_pages_range(&inode->i_data, destoff,
+                                  PAGE_CACHE_ALIGN(destoff + len) - 1);
+
+       lock_extent_range(src, off, len);
+
+       ret = btrfs_clone(src, inode, off, olen, len, destoff);
+
        unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1);
 out_unlock:
        mutex_unlock(&src->i_mutex);
        if (!same_inode)
                mutex_unlock(&inode->i_mutex);
-       vfree(buf);
-       btrfs_free_path(path);
 out_fput:
        fdput(src_file);
 out_drop_write:
@@ -3312,11 +3665,13 @@ static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg)
 
        switch (p->cmd) {
        case BTRFS_IOCTL_DEV_REPLACE_CMD_START:
+               if (root->fs_info->sb->s_flags & MS_RDONLY)
+                       return -EROFS;
+
                if (atomic_xchg(
                        &root->fs_info->mutually_exclusive_operation_running,
                        1)) {
-                       pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-                       ret = -EINPROGRESS;
+                       ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
                } else {
                        ret = btrfs_dev_replace_start(root, p);
                        atomic_set(
@@ -3560,8 +3915,7 @@ again:
        } else {
                /* this is (1) */
                mutex_unlock(&fs_info->balance_mutex);
-               pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
-               ret = -EINVAL;
+               ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS;
                goto out;
        }
 
@@ -3967,6 +4321,7 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
        struct btrfs_trans_handle *trans;
        struct timespec ct = CURRENT_TIME;
        int ret = 0;
+       int received_uuid_changed;
 
        ret = mnt_want_write_file(file);
        if (ret < 0)
@@ -3996,7 +4351,11 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
                goto out;
        }
 
-       trans = btrfs_start_transaction(root, 1);
+       /*
+        * 1 - root item
+        * 2 - uuid items (received uuid + subvol uuid)
+        */
+       trans = btrfs_start_transaction(root, 3);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                trans = NULL;
@@ -4007,24 +4366,42 @@ static long btrfs_ioctl_set_received_subvol(struct file *file,
        sa->rtime.sec = ct.tv_sec;
        sa->rtime.nsec = ct.tv_nsec;
 
+       received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid,
+                                      BTRFS_UUID_SIZE);
+       if (received_uuid_changed &&
+           !btrfs_is_empty_uuid(root_item->received_uuid))
+               btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root,
+                                   root_item->received_uuid,
+                                   BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                   root->root_key.objectid);
        memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE);
        btrfs_set_root_stransid(root_item, sa->stransid);
        btrfs_set_root_rtransid(root_item, sa->rtransid);
-       root_item->stime.sec = cpu_to_le64(sa->stime.sec);
-       root_item->stime.nsec = cpu_to_le32(sa->stime.nsec);
-       root_item->rtime.sec = cpu_to_le64(sa->rtime.sec);
-       root_item->rtime.nsec = cpu_to_le32(sa->rtime.nsec);
+       btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec);
+       btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec);
+       btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec);
+       btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec);
 
        ret = btrfs_update_root(trans, root->fs_info->tree_root,
                                &root->root_key, &root->root_item);
        if (ret < 0) {
                btrfs_end_transaction(trans, root);
-               trans = NULL;
                goto out;
-       } else {
-               ret = btrfs_commit_transaction(trans, root);
-               if (ret < 0)
+       }
+       if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) {
+               ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root,
+                                         sa->uuid,
+                                         BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                         root->root_key.objectid);
+               if (ret < 0 && ret != -EEXIST) {
+                       btrfs_abort_transaction(trans, root, ret);
                        goto out;
+               }
+       }
+       ret = btrfs_commit_transaction(trans, root);
+       if (ret < 0) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto out;
        }
 
        ret = copy_to_user(arg, sa, sizeof(*sa));
@@ -4041,18 +4418,22 @@ out:
 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg)
 {
        struct btrfs_root *root = BTRFS_I(file_inode(file))->root;
-       const char *label = root->fs_info->super_copy->label;
-       size_t len = strnlen(label, BTRFS_LABEL_SIZE);
+       size_t len;
        int ret;
+       char label[BTRFS_LABEL_SIZE];
+
+       spin_lock(&root->fs_info->super_lock);
+       memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE);
+       spin_unlock(&root->fs_info->super_lock);
+
+       len = strnlen(label, BTRFS_LABEL_SIZE);
 
        if (len == BTRFS_LABEL_SIZE) {
                pr_warn("btrfs: label is too long, return the first %zu bytes\n",
                        --len);
        }
 
-       mutex_lock(&root->fs_info->volume_mutex);
        ret = copy_to_user(arg, label, len);
-       mutex_unlock(&root->fs_info->volume_mutex);
 
        return ret ? -EFAULT : 0;
 }
@@ -4081,18 +4462,18 @@ static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg)
        if (ret)
                return ret;
 
-       mutex_lock(&root->fs_info->volume_mutex);
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                ret = PTR_ERR(trans);
                goto out_unlock;
        }
 
+       spin_lock(&root->fs_info->super_lock);
        strcpy(super_block->label, label);
+       spin_unlock(&root->fs_info->super_lock);
        ret = btrfs_end_transaction(trans, root);
 
 out_unlock:
-       mutex_unlock(&root->fs_info->volume_mutex);
        mnt_drop_write_file(file);
        return ret;
 }
@@ -4207,6 +4588,8 @@ long btrfs_ioctl(struct file *file, unsigned int
                return btrfs_ioctl_get_fslabel(file, argp);
        case BTRFS_IOC_SET_FSLABEL:
                return btrfs_ioctl_set_fslabel(file, argp);
+       case BTRFS_IOC_FILE_EXTENT_SAME:
+               return btrfs_ioctl_file_extent_same(file, argp);
        }
 
        return -ENOTTY;
index f93151a..b6a6f07 100644 (file)
@@ -207,8 +207,10 @@ static int lzo_compress_pages(struct list_head *ws,
                }
 
                /* we're making it bigger, give up */
-               if (tot_in > 8192 && tot_in < tot_out)
+               if (tot_in > 8192 && tot_in < tot_out) {
+                       ret = -1;
                        goto out;
+               }
 
                /* we're all done */
                if (tot_in >= len)
index 8136982..966b413 100644 (file)
@@ -67,7 +67,7 @@ static void ordered_data_tree_panic(struct inode *inode, int errno,
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        btrfs_panic(fs_info, errno, "Inconsistency in ordered tree at offset "
-                   "%llu\n", (unsigned long long)offset);
+                   "%llu\n", offset);
 }
 
 /*
@@ -205,6 +205,7 @@ static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset,
        entry->bytes_left = len;
        entry->inode = igrab(inode);
        entry->compress_type = compress_type;
+       entry->truncated_len = (u64)-1;
        if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE)
                set_bit(type, &entry->flags);
 
@@ -336,14 +337,12 @@ int btrfs_dec_test_first_ordered_pending(struct inode *inode,
        *file_offset = dec_end;
        if (dec_start > dec_end) {
                printk(KERN_CRIT "bad ordering dec_start %llu end %llu\n",
-                      (unsigned long long)dec_start,
-                      (unsigned long long)dec_end);
+                      dec_start, dec_end);
        }
        to_dec = dec_end - dec_start;
        if (to_dec > entry->bytes_left) {
                printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
-                      (unsigned long long)entry->bytes_left,
-                      (unsigned long long)to_dec);
+                      entry->bytes_left, to_dec);
        }
        entry->bytes_left -= to_dec;
        if (!uptodate)
@@ -403,8 +402,7 @@ have_entry:
 
        if (io_size > entry->bytes_left) {
                printk(KERN_CRIT "bad ordered accounting left %llu size %llu\n",
-                      (unsigned long long)entry->bytes_left,
-                      (unsigned long long)io_size);
+                      entry->bytes_left, io_size);
        }
        entry->bytes_left -= io_size;
        if (!uptodate)
@@ -671,7 +669,7 @@ int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
        INIT_LIST_HEAD(&splice);
        INIT_LIST_HEAD(&works);
 
-       mutex_lock(&root->fs_info->ordered_operations_mutex);
+       mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
        spin_lock(&root->fs_info->ordered_root_lock);
        list_splice_init(&cur_trans->ordered_operations, &splice);
        while (!list_empty(&splice)) {
@@ -718,7 +716,7 @@ out:
                list_del_init(&work->list);
                btrfs_wait_and_free_delalloc_work(work);
        }
-       mutex_unlock(&root->fs_info->ordered_operations_mutex);
+       mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
        return ret;
 }
 
@@ -923,12 +921,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
        struct btrfs_ordered_extent *test;
        int ret = 1;
 
-       if (ordered)
+       spin_lock_irq(&tree->lock);
+       if (ordered) {
                offset = entry_end(ordered);
-       else
+               if (test_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags))
+                       offset = min(offset,
+                                    ordered->file_offset +
+                                    ordered->truncated_len);
+       } else {
                offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
-
-       spin_lock_irq(&tree->lock);
+       }
        disk_i_size = BTRFS_I(inode)->disk_i_size;
 
        /* truncate file */
index 68844d5..d9a5aa0 100644 (file)
@@ -69,6 +69,7 @@ struct btrfs_ordered_sum {
                                       * the isize. */
 #define BTRFS_ORDERED_LOGGED_CSUM 8 /* We've logged the csums on this ordered
                                       ordered extent */
+#define BTRFS_ORDERED_TRUNCATED 9 /* Set when we have to truncate an extent */
 
 struct btrfs_ordered_extent {
        /* logical offset in the file */
@@ -96,6 +97,12 @@ struct btrfs_ordered_extent {
         */
        u64 outstanding_isize;
 
+       /*
+        * If we get truncated we need to adjust the file extent we enter for
+        * this ordered extent so that we do not expose stale data.
+        */
+       u64 truncated_len;
+
        /* flags (described above) */
        unsigned long flags;
 
index dc0024f..0088bed 100644 (file)
@@ -26,14 +26,12 @@ static void print_chunk(struct extent_buffer *eb, struct btrfs_chunk *chunk)
        int i;
        printk(KERN_INFO "\t\tchunk length %llu owner %llu type %llu "
               "num_stripes %d\n",
-              (unsigned long long)btrfs_chunk_length(eb, chunk),
-              (unsigned long long)btrfs_chunk_owner(eb, chunk),
-              (unsigned long long)btrfs_chunk_type(eb, chunk),
-              num_stripes);
+              btrfs_chunk_length(eb, chunk), btrfs_chunk_owner(eb, chunk),
+              btrfs_chunk_type(eb, chunk), num_stripes);
        for (i = 0 ; i < num_stripes ; i++) {
                printk(KERN_INFO "\t\t\tstripe %d devid %llu offset %llu\n", i,
-                     (unsigned long long)btrfs_stripe_devid_nr(eb, chunk, i),
-                     (unsigned long long)btrfs_stripe_offset_nr(eb, chunk, i));
+                     btrfs_stripe_devid_nr(eb, chunk, i),
+                     btrfs_stripe_offset_nr(eb, chunk, i));
        }
 }
 static void print_dev_item(struct extent_buffer *eb,
@@ -41,18 +39,18 @@ static void print_dev_item(struct extent_buffer *eb,
 {
        printk(KERN_INFO "\t\tdev item devid %llu "
               "total_bytes %llu bytes used %llu\n",
-              (unsigned long long)btrfs_device_id(eb, dev_item),
-              (unsigned long long)btrfs_device_total_bytes(eb, dev_item),
-              (unsigned long long)btrfs_device_bytes_used(eb, dev_item));
+              btrfs_device_id(eb, dev_item),
+              btrfs_device_total_bytes(eb, dev_item),
+              btrfs_device_bytes_used(eb, dev_item));
 }
 static void print_extent_data_ref(struct extent_buffer *eb,
                                  struct btrfs_extent_data_ref *ref)
 {
        printk(KERN_INFO "\t\textent data backref root %llu "
               "objectid %llu offset %llu count %u\n",
-              (unsigned long long)btrfs_extent_data_ref_root(eb, ref),
-              (unsigned long long)btrfs_extent_data_ref_objectid(eb, ref),
-              (unsigned long long)btrfs_extent_data_ref_offset(eb, ref),
+              btrfs_extent_data_ref_root(eb, ref),
+              btrfs_extent_data_ref_objectid(eb, ref),
+              btrfs_extent_data_ref_offset(eb, ref),
               btrfs_extent_data_ref_count(eb, ref));
 }
 
@@ -87,19 +85,17 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
        flags = btrfs_extent_flags(eb, ei);
 
        printk(KERN_INFO "\t\textent refs %llu gen %llu flags %llu\n",
-              (unsigned long long)btrfs_extent_refs(eb, ei),
-              (unsigned long long)btrfs_extent_generation(eb, ei),
-              (unsigned long long)flags);
+              btrfs_extent_refs(eb, ei), btrfs_extent_generation(eb, ei),
+              flags);
 
        if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
                struct btrfs_tree_block_info *info;
                info = (struct btrfs_tree_block_info *)(ei + 1);
                btrfs_tree_block_key(eb, info, &key);
-               printk(KERN_INFO "\t\ttree block key (%llu %x %llu) "
+               printk(KERN_INFO "\t\ttree block key (%llu %u %llu) "
                       "level %d\n",
-                      (unsigned long long)btrfs_disk_key_objectid(&key),
-                      key.type,
-                      (unsigned long long)btrfs_disk_key_offset(&key),
+                      btrfs_disk_key_objectid(&key), key.type,
+                      btrfs_disk_key_offset(&key),
                       btrfs_tree_block_level(eb, info));
                iref = (struct btrfs_extent_inline_ref *)(info + 1);
        } else {
@@ -115,11 +111,11 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
                switch (type) {
                case BTRFS_TREE_BLOCK_REF_KEY:
                        printk(KERN_INFO "\t\ttree block backref "
-                               "root %llu\n", (unsigned long long)offset);
+                               "root %llu\n", offset);
                        break;
                case BTRFS_SHARED_BLOCK_REF_KEY:
                        printk(KERN_INFO "\t\tshared block backref "
-                               "parent %llu\n", (unsigned long long)offset);
+                               "parent %llu\n", offset);
                        break;
                case BTRFS_EXTENT_DATA_REF_KEY:
                        dref = (struct btrfs_extent_data_ref *)(&iref->offset);
@@ -129,8 +125,7 @@ static void print_extent_item(struct extent_buffer *eb, int slot)
                        sref = (struct btrfs_shared_data_ref *)(iref + 1);
                        printk(KERN_INFO "\t\tshared data backref "
                               "parent %llu count %u\n",
-                              (unsigned long long)offset,
-                              btrfs_shared_data_ref_count(eb, sref));
+                              offset, btrfs_shared_data_ref_count(eb, sref));
                        break;
                default:
                        BUG();
@@ -148,13 +143,32 @@ static void print_extent_ref_v0(struct extent_buffer *eb, int slot)
        ref0 = btrfs_item_ptr(eb, slot, struct btrfs_extent_ref_v0);
        printk("\t\textent back ref root %llu gen %llu "
                "owner %llu num_refs %lu\n",
-               (unsigned long long)btrfs_ref_root_v0(eb, ref0),
-               (unsigned long long)btrfs_ref_generation_v0(eb, ref0),
-               (unsigned long long)btrfs_ref_objectid_v0(eb, ref0),
+               btrfs_ref_root_v0(eb, ref0),
+               btrfs_ref_generation_v0(eb, ref0),
+               btrfs_ref_objectid_v0(eb, ref0),
                (unsigned long)btrfs_ref_count_v0(eb, ref0));
 }
 #endif
 
+static void print_uuid_item(struct extent_buffer *l, unsigned long offset,
+                           u32 item_size)
+{
+       if (!IS_ALIGNED(item_size, sizeof(u64))) {
+               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+                       (unsigned long)item_size);
+               return;
+       }
+       while (item_size) {
+               __le64 subvol_id;
+
+               read_extent_buffer(l, &subvol_id, offset, sizeof(subvol_id));
+               printk(KERN_INFO "\t\tsubvol_id %llu\n",
+                      (unsigned long long)le64_to_cpu(subvol_id));
+               item_size -= sizeof(u64);
+               offset += sizeof(u64);
+       }
+}
+
 void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
 {
        int i;
@@ -177,39 +191,34 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
        nr = btrfs_header_nritems(l);
 
        btrfs_info(root->fs_info, "leaf %llu total ptrs %d free space %d",
-               (unsigned long long)btrfs_header_bytenr(l), nr,
-               btrfs_leaf_free_space(root, l));
+                  btrfs_header_bytenr(l), nr, btrfs_leaf_free_space(root, l));
        for (i = 0 ; i < nr ; i++) {
                item = btrfs_item_nr(l, i);
                btrfs_item_key_to_cpu(l, &key, i);
                type = btrfs_key_type(&key);
-               printk(KERN_INFO "\titem %d key (%llu %x %llu) itemoff %d "
+               printk(KERN_INFO "\titem %d key (%llu %u %llu) itemoff %d "
                       "itemsize %d\n",
-                       i,
-                       (unsigned long long)key.objectid, type,
-                       (unsigned long long)key.offset,
+                       i, key.objectid, type, key.offset,
                        btrfs_item_offset(l, item), btrfs_item_size(l, item));
                switch (type) {
                case BTRFS_INODE_ITEM_KEY:
                        ii = btrfs_item_ptr(l, i, struct btrfs_inode_item);
                        printk(KERN_INFO "\t\tinode generation %llu size %llu "
                               "mode %o\n",
-                              (unsigned long long)
                               btrfs_inode_generation(l, ii),
-                             (unsigned long long)btrfs_inode_size(l, ii),
+                              btrfs_inode_size(l, ii),
                               btrfs_inode_mode(l, ii));
                        break;
                case BTRFS_DIR_ITEM_KEY:
                        di = btrfs_item_ptr(l, i, struct btrfs_dir_item);
                        btrfs_dir_item_key_to_cpu(l, di, &found_key);
                        printk(KERN_INFO "\t\tdir oid %llu type %u\n",
-                               (unsigned long long)found_key.objectid,
+                               found_key.objectid,
                                btrfs_dir_type(l, di));
                        break;
                case BTRFS_ROOT_ITEM_KEY:
                        ri = btrfs_item_ptr(l, i, struct btrfs_root_item);
                        printk(KERN_INFO "\t\troot data bytenr %llu refs %u\n",
-                               (unsigned long long)
                                btrfs_disk_root_bytenr(l, ri),
                                btrfs_disk_root_refs(l, ri));
                        break;
@@ -245,17 +254,12 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                        }
                        printk(KERN_INFO "\t\textent data disk bytenr %llu "
                               "nr %llu\n",
-                              (unsigned long long)
                               btrfs_file_extent_disk_bytenr(l, fi),
-                              (unsigned long long)
                               btrfs_file_extent_disk_num_bytes(l, fi));
                        printk(KERN_INFO "\t\textent data offset %llu "
                               "nr %llu ram %llu\n",
-                              (unsigned long long)
                               btrfs_file_extent_offset(l, fi),
-                              (unsigned long long)
                               btrfs_file_extent_num_bytes(l, fi),
-                              (unsigned long long)
                               btrfs_file_extent_ram_bytes(l, fi));
                        break;
                case BTRFS_EXTENT_REF_V0_KEY:
@@ -269,7 +273,6 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                        bi = btrfs_item_ptr(l, i,
                                            struct btrfs_block_group_item);
                        printk(KERN_INFO "\t\tblock group used %llu\n",
-                              (unsigned long long)
                               btrfs_disk_block_group_used(l, bi));
                        break;
                case BTRFS_CHUNK_ITEM_KEY:
@@ -286,13 +289,9 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                        printk(KERN_INFO "\t\tdev extent chunk_tree %llu\n"
                               "\t\tchunk objectid %llu chunk offset %llu "
                               "length %llu\n",
-                              (unsigned long long)
                               btrfs_dev_extent_chunk_tree(l, dev_extent),
-                              (unsigned long long)
                               btrfs_dev_extent_chunk_objectid(l, dev_extent),
-                              (unsigned long long)
                               btrfs_dev_extent_chunk_offset(l, dev_extent),
-                              (unsigned long long)
                               btrfs_dev_extent_length(l, dev_extent));
                        break;
                case BTRFS_DEV_STATS_KEY:
@@ -301,6 +300,11 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
                case BTRFS_DEV_REPLACE_KEY:
                        printk(KERN_INFO "\t\tdev replace\n");
                        break;
+               case BTRFS_UUID_KEY_SUBVOL:
+               case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+                       print_uuid_item(l, btrfs_item_ptr_offset(l, i),
+                                       btrfs_item_size_nr(l, i));
+                       break;
                };
        }
 }
@@ -320,16 +324,13 @@ void btrfs_print_tree(struct btrfs_root *root, struct extent_buffer *c)
                return;
        }
        btrfs_info(root->fs_info, "node %llu level %d total ptrs %d free spc %u",
-               (unsigned long long)btrfs_header_bytenr(c),
-               level, nr, (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
+               btrfs_header_bytenr(c), level, nr,
+               (u32)BTRFS_NODEPTRS_PER_BLOCK(root) - nr);
        for (i = 0; i < nr; i++) {
                btrfs_node_key_to_cpu(c, &key, i);
                printk(KERN_INFO "\tkey %d (%llu %u %llu) block %llu\n",
-                      i,
-                      (unsigned long long)key.objectid,
-                      key.type,
-                      (unsigned long long)key.offset,
-                      (unsigned long long)btrfs_node_blockptr(c, i));
+                      i, key.objectid, key.type, key.offset,
+                      btrfs_node_blockptr(c, i));
        }
        for (i = 0; i < nr; i++) {
                struct extent_buffer *next = read_tree_block(root,
index 1280eff..4e6ef49 100644 (file)
@@ -157,18 +157,11 @@ static struct btrfs_qgroup *add_qgroup_rb(struct btrfs_fs_info *fs_info,
        return qgroup;
 }
 
-/* must be called with qgroup_lock held */
-static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
+static void __del_qgroup_rb(struct btrfs_qgroup *qgroup)
 {
-       struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
        struct btrfs_qgroup_list *list;
 
-       if (!qgroup)
-               return -ENOENT;
-
-       rb_erase(&qgroup->node, &fs_info->qgroup_tree);
        list_del(&qgroup->dirty);
-
        while (!list_empty(&qgroup->groups)) {
                list = list_first_entry(&qgroup->groups,
                                        struct btrfs_qgroup_list, next_group);
@@ -185,7 +178,18 @@ static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
                kfree(list);
        }
        kfree(qgroup);
+}
 
+/* must be called with qgroup_lock held */
+static int del_qgroup_rb(struct btrfs_fs_info *fs_info, u64 qgroupid)
+{
+       struct btrfs_qgroup *qgroup = find_qgroup_rb(fs_info, qgroupid);
+
+       if (!qgroup)
+               return -ENOENT;
+
+       rb_erase(&qgroup->node, &fs_info->qgroup_tree);
+       __del_qgroup_rb(qgroup);
        return 0;
 }
 
@@ -394,8 +398,7 @@ next1:
                if (ret == -ENOENT) {
                        printk(KERN_WARNING
                                "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
-                               (unsigned long long)found_key.objectid,
-                               (unsigned long long)found_key.offset);
+                               found_key.objectid, found_key.offset);
                        ret = 0;        /* ignore the error */
                }
                if (ret)
@@ -428,39 +431,28 @@ out:
 }
 
 /*
- * This is only called from close_ctree() or open_ctree(), both in single-
- * treaded paths. Clean up the in-memory structures. No locking needed.
+ * This is called from close_ctree() or open_ctree() or btrfs_quota_disable(),
+ * first two are in single-threaded paths.And for the third one, we have set
+ * quota_root to be null with qgroup_lock held before, so it is safe to clean
+ * up the in-memory structures without qgroup_lock held.
  */
 void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
 {
        struct rb_node *n;
        struct btrfs_qgroup *qgroup;
-       struct btrfs_qgroup_list *list;
 
        while ((n = rb_first(&fs_info->qgroup_tree))) {
                qgroup = rb_entry(n, struct btrfs_qgroup, node);
                rb_erase(n, &fs_info->qgroup_tree);
-
-               while (!list_empty(&qgroup->groups)) {
-                       list = list_first_entry(&qgroup->groups,
-                                               struct btrfs_qgroup_list,
-                                               next_group);
-                       list_del(&list->next_group);
-                       list_del(&list->next_member);
-                       kfree(list);
-               }
-
-               while (!list_empty(&qgroup->members)) {
-                       list = list_first_entry(&qgroup->members,
-                                               struct btrfs_qgroup_list,
-                                               next_member);
-                       list_del(&list->next_group);
-                       list_del(&list->next_member);
-                       kfree(list);
-               }
-               kfree(qgroup);
+               __del_qgroup_rb(qgroup);
        }
+       /*
+        * we call btrfs_free_qgroup_config() when umounting
+        * filesystem and disabling quota, so we set qgroup_ulit
+        * to be null here to avoid double free.
+        */
        ulist_free(fs_info->qgroup_ulist);
+       fs_info->qgroup_ulist = NULL;
 }
 
 static int add_qgroup_relation_item(struct btrfs_trans_handle *trans,
@@ -946,13 +938,9 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans,
        fs_info->pending_quota_state = 0;
        quota_root = fs_info->quota_root;
        fs_info->quota_root = NULL;
-       btrfs_free_qgroup_config(fs_info);
        spin_unlock(&fs_info->qgroup_lock);
 
-       if (!quota_root) {
-               ret = -EINVAL;
-               goto out;
-       }
+       btrfs_free_qgroup_config(fs_info);
 
        ret = btrfs_clean_quota_tree(trans, quota_root);
        if (ret)
@@ -1174,7 +1162,7 @@ int btrfs_limit_qgroup(struct btrfs_trans_handle *trans,
        if (ret) {
                fs_info->qgroup_flags |= BTRFS_QGROUP_STATUS_FLAG_INCONSISTENT;
                printk(KERN_INFO "unable to update quota limit for %llu\n",
-                      (unsigned long long)qgroupid);
+                      qgroupid);
        }
 
        spin_lock(&fs_info->qgroup_lock);
@@ -1884,10 +1872,9 @@ qgroup_rescan_leaf(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
                                         path, 1, 0);
 
        pr_debug("current progress key (%llu %u %llu), search_slot ret %d\n",
-                (unsigned long long)fs_info->qgroup_rescan_progress.objectid,
+                fs_info->qgroup_rescan_progress.objectid,
                 fs_info->qgroup_rescan_progress.type,
-                (unsigned long long)fs_info->qgroup_rescan_progress.offset,
-                ret);
+                fs_info->qgroup_rescan_progress.offset, ret);
 
        if (ret) {
                /*
index 0525e13..d0ecfbd 100644 (file)
@@ -1540,8 +1540,10 @@ static int full_stripe_write(struct btrfs_raid_bio *rbio)
        int ret;
 
        ret = alloc_rbio_parity_pages(rbio);
-       if (ret)
+       if (ret) {
+               __free_raid_bio(rbio);
                return ret;
+       }
 
        ret = lock_stripe_add(rbio);
        if (ret == 0)
@@ -1687,11 +1689,8 @@ int raid56_parity_write(struct btrfs_root *root, struct bio *bio,
        struct blk_plug_cb *cb;
 
        rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
-       if (IS_ERR(rbio)) {
-               kfree(raid_map);
-               kfree(bbio);
+       if (IS_ERR(rbio))
                return PTR_ERR(rbio);
-       }
        bio_list_add(&rbio->bio_list, bio);
        rbio->bio_list_bytes = bio->bi_size;
 
@@ -2041,9 +2040,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
        int ret;
 
        rbio = alloc_rbio(root, bbio, raid_map, stripe_len);
-       if (IS_ERR(rbio)) {
+       if (IS_ERR(rbio))
                return PTR_ERR(rbio);
-       }
 
        rbio->read_rebuild = 1;
        bio_list_add(&rbio->bio_list, bio);
@@ -2052,6 +2050,8 @@ int raid56_parity_recover(struct btrfs_root *root, struct bio *bio,
        rbio->faila = find_logical_bio_stripe(rbio, bio);
        if (rbio->faila == -1) {
                BUG();
+               kfree(raid_map);
+               kfree(bbio);
                kfree(rbio);
                return -EIO;
        }
index 1209649..aacc212 100644 (file)
@@ -335,7 +335,7 @@ static void backref_tree_panic(struct rb_node *rb_node, int errno, u64 bytenr)
        if (bnode->root)
                fs_info = bnode->root->fs_info;
        btrfs_panic(fs_info, errno, "Inconsistency in backref cache "
-                   "found at offset %llu\n", (unsigned long long)bytenr);
+                   "found at offset %llu\n", bytenr);
 }
 
 /*
@@ -641,6 +641,11 @@ int find_inline_backref(struct extent_buffer *leaf, int slot,
                WARN_ON(item_size < sizeof(*ei) + sizeof(*bi));
                return 1;
        }
+       if (key.type == BTRFS_METADATA_ITEM_KEY &&
+           item_size <= sizeof(*ei)) {
+               WARN_ON(item_size < sizeof(*ei));
+               return 1;
+       }
 
        if (key.type == BTRFS_EXTENT_ITEM_KEY) {
                bi = (struct btrfs_tree_block_info *)(ei + 1);
@@ -691,6 +696,7 @@ struct backref_node *build_backref_tree(struct reloc_control *rc,
        int cowonly;
        int ret;
        int err = 0;
+       bool need_check = true;
 
        path1 = btrfs_alloc_path();
        path2 = btrfs_alloc_path();
@@ -914,6 +920,7 @@ again:
                        cur->bytenr);
 
                lower = cur;
+               need_check = true;
                for (; level < BTRFS_MAX_LEVEL; level++) {
                        if (!path2->nodes[level]) {
                                BUG_ON(btrfs_root_bytenr(&root->root_item) !=
@@ -957,14 +964,12 @@ again:
 
                                /*
                                 * add the block to pending list if we
-                                * need check its backrefs. only block
-                                * at 'cur->level + 1' is added to the
-                                * tail of pending list. this guarantees
-                                * we check backrefs from lower level
-                                * blocks to upper level blocks.
+                                * need check its backrefs, we only do this once
+                                * while walking up a tree as we will catch
+                                * anything else later on.
                                 */
-                               if (!upper->checked &&
-                                   level == cur->level + 1) {
+                               if (!upper->checked && need_check) {
+                                       need_check = false;
                                        list_add_tail(&edge->list[UPPER],
                                                      &list);
                                } else
@@ -2314,8 +2319,13 @@ again:
                        BUG_ON(root->reloc_root != reloc_root);
 
                        ret = merge_reloc_root(rc, root);
-                       if (ret)
+                       if (ret) {
+                               __update_reloc_root(reloc_root, 1);
+                               free_extent_buffer(reloc_root->node);
+                               free_extent_buffer(reloc_root->commit_root);
+                               kfree(reloc_root);
                                goto out;
+                       }
                } else {
                        list_del_init(&reloc_root->root_list);
                }
@@ -2344,9 +2354,6 @@ again:
                        if (IS_ERR(root))
                                continue;
 
-                       if (btrfs_root_refs(&root->root_item) == 0)
-                               continue;
-
                        trans = btrfs_join_transaction(root);
                        BUG_ON(IS_ERR(trans));
 
@@ -3628,7 +3635,7 @@ int add_data_references(struct reloc_control *rc,
        unsigned long ptr;
        unsigned long end;
        u32 blocksize = btrfs_level_size(rc->extent_root, 0);
-       int ret;
+       int ret = 0;
        int err = 0;
 
        eb = path->nodes[0];
@@ -3655,6 +3662,10 @@ int add_data_references(struct reloc_control *rc,
                } else {
                        BUG();
                }
+               if (ret) {
+                       err = ret;
+                       goto out;
+               }
                ptr += btrfs_extent_inline_ref_size(key.type);
        }
        WARN_ON(ptr > end);
@@ -3700,6 +3711,7 @@ int add_data_references(struct reloc_control *rc,
                }
                path->slots[0]++;
        }
+out:
        btrfs_release_path(path);
        if (err)
                free_block_list(blocks);
@@ -4219,8 +4231,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
        }
 
        printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n",
-              (unsigned long long)rc->block_group->key.objectid,
-              (unsigned long long)rc->block_group->flags);
+              rc->block_group->key.objectid, rc->block_group->flags);
 
        ret = btrfs_start_all_delalloc_inodes(fs_info, 0);
        if (ret < 0) {
@@ -4242,7 +4253,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
                        break;
 
                printk(KERN_INFO "btrfs: found %llu extents\n",
-                       (unsigned long long)rc->extents_found);
+                       rc->extents_found);
 
                if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) {
                        btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1);
index ffb1036..0b1f4ef 100644 (file)
@@ -29,8 +29,8 @@
  * generation numbers as then we know the root was once mounted with an older
  * kernel that was not aware of the root item structure change.
  */
-void btrfs_read_root_item(struct extent_buffer *eb, int slot,
-                         struct btrfs_root_item *item)
+static void btrfs_read_root_item(struct extent_buffer *eb, int slot,
+                               struct btrfs_root_item *item)
 {
        uuid_le uuid;
        int len;
@@ -155,8 +155,7 @@ int btrfs_update_root(struct btrfs_trans_handle *trans, struct btrfs_root
        if (ret != 0) {
                btrfs_print_leaf(root, path->nodes[0]);
                printk(KERN_CRIT "unable to update root key %llu %u %llu\n",
-                      (unsigned long long)key->objectid, key->type,
-                      (unsigned long long)key->offset);
+                      key->objectid, key->type, key->offset);
                BUG_ON(1);
        }
 
@@ -490,13 +489,13 @@ again:
  */
 void btrfs_check_and_init_root_item(struct btrfs_root_item *root_item)
 {
-       u64 inode_flags = le64_to_cpu(root_item->inode.flags);
+       u64 inode_flags = btrfs_stack_inode_flags(&root_item->inode);
 
        if (!(inode_flags & BTRFS_INODE_ROOT_ITEM_INIT)) {
                inode_flags |= BTRFS_INODE_ROOT_ITEM_INIT;
-               root_item->inode.flags = cpu_to_le64(inode_flags);
-               root_item->flags = 0;
-               root_item->byte_limit = 0;
+               btrfs_set_stack_inode_flags(&root_item->inode, inode_flags);
+               btrfs_set_root_flags(root_item, 0);
+               btrfs_set_root_limit(root_item, 0);
        }
 }
 
@@ -507,8 +506,8 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
        struct timespec ct = CURRENT_TIME;
 
        spin_lock(&root->root_item_lock);
-       item->ctransid = cpu_to_le64(trans->transid);
-       item->ctime.sec = cpu_to_le64(ct.tv_sec);
-       item->ctime.nsec = cpu_to_le32(ct.tv_nsec);
+       btrfs_set_root_ctransid(item, trans->transid);
+       btrfs_set_stack_timespec_sec(&item->ctime, ct.tv_sec);
+       btrfs_set_stack_timespec_nsec(&item->ctime, ct.tv_nsec);
        spin_unlock(&root->root_item_lock);
 }
index 64a157b..0afcd45 100644 (file)
@@ -754,8 +754,7 @@ out:
                        num_uncorrectable_read_errors);
                printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n",
-                       (unsigned long long)fixup->logical,
-                       rcu_str_deref(fixup->dev->name));
+                       fixup->logical, rcu_str_deref(fixup->dev->name));
        }
 
        btrfs_free_path(path);
@@ -1154,8 +1153,7 @@ corrected_error:
                        spin_unlock(&sctx->stat_lock);
                        printk_ratelimited_in_rcu(KERN_ERR
                                "btrfs: fixed up error at logical %llu on dev %s\n",
-                               (unsigned long long)logical,
-                               rcu_str_deref(dev->name));
+                               logical, rcu_str_deref(dev->name));
                }
        } else {
 did_not_correct_error:
@@ -1164,8 +1162,7 @@ did_not_correct_error:
                spin_unlock(&sctx->stat_lock);
                printk_ratelimited_in_rcu(KERN_ERR
                        "btrfs: unable to fixup (regular) error at logical %llu on dev %s\n",
-                       (unsigned long long)logical,
-                       rcu_str_deref(dev->name));
+                       logical, rcu_str_deref(dev->name));
        }
 
 out:
@@ -1345,12 +1342,12 @@ static void scrub_recheck_block_checksum(struct btrfs_fs_info *fs_info,
                mapped_buffer = kmap_atomic(sblock->pagev[0]->page);
                h = (struct btrfs_header *)mapped_buffer;
 
-               if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr) ||
+               if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h) ||
                    memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE) ||
                    memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid,
                           BTRFS_UUID_SIZE)) {
                        sblock->header_error = 1;
-               } else if (generation != le64_to_cpu(h->generation)) {
+               } else if (generation != btrfs_stack_header_generation(h)) {
                        sblock->header_error = 1;
                        sblock->generation_error = 1;
                }
@@ -1720,10 +1717,10 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock)
         * b) the page is already kmapped
         */
 
-       if (sblock->pagev[0]->logical != le64_to_cpu(h->bytenr))
+       if (sblock->pagev[0]->logical != btrfs_stack_header_bytenr(h))
                ++fail;
 
-       if (sblock->pagev[0]->generation != le64_to_cpu(h->generation))
+       if (sblock->pagev[0]->generation != btrfs_stack_header_generation(h))
                ++fail;
 
        if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1786,10 +1783,10 @@ static int scrub_checksum_super(struct scrub_block *sblock)
        s = (struct btrfs_super_block *)mapped_buffer;
        memcpy(on_disk_csum, s->csum, sctx->csum_size);
 
-       if (sblock->pagev[0]->logical != le64_to_cpu(s->bytenr))
+       if (sblock->pagev[0]->logical != btrfs_super_bytenr(s))
                ++fail_cor;
 
-       if (sblock->pagev[0]->generation != le64_to_cpu(s->generation))
+       if (sblock->pagev[0]->generation != btrfs_super_generation(s))
                ++fail_gen;
 
        if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE))
@@ -2455,8 +2452,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx,
                                printk(KERN_ERR
                                       "btrfs scrub: tree block %llu spanning "
                                       "stripes, ignored. logical=%llu\n",
-                                      (unsigned long long)key.objectid,
-                                      (unsigned long long)logical);
+                                      key.objectid, logical);
                                goto next;
                        }
 
@@ -2863,9 +2859,8 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
        if (fs_info->chunk_root->sectorsize != PAGE_SIZE) {
                /* not supported for data w/o checksums */
                printk(KERN_ERR
-                      "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lld) fails\n",
-                      fs_info->chunk_root->sectorsize,
-                      (unsigned long long)PAGE_SIZE);
+                      "btrfs_scrub: size assumption sectorsize != PAGE_SIZE (%d != %lu) fails\n",
+                      fs_info->chunk_root->sectorsize, PAGE_SIZE);
                return -EINVAL;
        }
 
@@ -3175,11 +3170,9 @@ static void copy_nocow_pages_worker(struct btrfs_work *work)
                                          copy_nocow_pages_for_inode,
                                          nocow_ctx);
        if (ret != 0 && ret != -ENOENT) {
-               pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n",
-                       (unsigned long long)logical,
-                       (unsigned long long)physical_for_dev_replace,
-                       (unsigned long long)len,
-                       (unsigned long long)mirror_num, ret);
+               pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %u, ret %d\n",
+                       logical, physical_for_dev_replace, len, mirror_num,
+                       ret);
                not_written = 1;
                goto out;
        }
@@ -3224,11 +3217,6 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
                return PTR_ERR(local_root);
        }
 
-       if (btrfs_root_refs(&local_root->root_item) == 0) {
-               srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
-               return -ENOENT;
-       }
-
        key.type = BTRFS_INODE_ITEM_KEY;
        key.objectid = inum;
        key.offset = 0;
index 2e14fd8..e46e0ed 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/radix-tree.h>
 #include <linux/crc32c.h>
 #include <linux/vmalloc.h>
+#include <linux/string.h>
 
 #include "send.h"
 #include "backref.h"
@@ -54,8 +55,8 @@ struct fs_path {
 
                        char *buf;
                        int buf_len;
-                       int reversed:1;
-                       int virtual_mem:1;
+                       unsigned int reversed:1;
+                       unsigned int virtual_mem:1;
                        char inline_buf[];
                };
                char pad[PAGE_SIZE];
@@ -1668,6 +1669,7 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
                              u64 *who_ino, u64 *who_gen)
 {
        int ret = 0;
+       u64 gen;
        u64 other_inode = 0;
        u8 other_type = 0;
 
@@ -1678,6 +1680,24 @@ static int will_overwrite_ref(struct send_ctx *sctx, u64 dir, u64 dir_gen,
        if (ret <= 0)
                goto out;
 
+       /*
+        * If we have a parent root we need to verify that the parent dir was
+        * not delted and then re-created, if it was then we have no overwrite
+        * and we can just unlink this entry.
+        */
+       if (sctx->parent_root) {
+               ret = get_inode_info(sctx->parent_root, dir, NULL, &gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret < 0 && ret != -ENOENT)
+                       goto out;
+               if (ret) {
+                       ret = 0;
+                       goto out;
+               }
+               if (gen != dir_gen)
+                       goto out;
+       }
+
        ret = lookup_dir_item_inode(sctx->parent_root, dir, name, name_len,
                        &other_inode, &other_type);
        if (ret < 0 && ret != -ENOENT)
@@ -2519,7 +2539,8 @@ static int did_create_dir(struct send_ctx *sctx, u64 dir)
                di = btrfs_item_ptr(eb, slot, struct btrfs_dir_item);
                btrfs_dir_item_key_to_cpu(eb, di, &di_key);
 
-               if (di_key.objectid < sctx->send_progress) {
+               if (di_key.type != BTRFS_ROOT_ITEM_KEY &&
+                   di_key.objectid < sctx->send_progress) {
                        ret = 1;
                        goto out;
                }
@@ -2581,7 +2602,6 @@ static int record_ref(struct list_head *head, u64 dir,
                      u64 dir_gen, struct fs_path *path)
 {
        struct recorded_ref *ref;
-       char *tmp;
 
        ref = kmalloc(sizeof(*ref), GFP_NOFS);
        if (!ref)
@@ -2591,25 +2611,35 @@ static int record_ref(struct list_head *head, u64 dir,
        ref->dir_gen = dir_gen;
        ref->full_path = path;
 
-       tmp = strrchr(ref->full_path->start, '/');
-       if (!tmp) {
-               ref->name_len = ref->full_path->end - ref->full_path->start;
-               ref->name = ref->full_path->start;
+       ref->name = (char *)kbasename(ref->full_path->start);
+       ref->name_len = ref->full_path->end - ref->name;
+       ref->dir_path = ref->full_path->start;
+       if (ref->name == ref->full_path->start)
                ref->dir_path_len = 0;
-               ref->dir_path = ref->full_path->start;
-       } else {
-               tmp++;
-               ref->name_len = ref->full_path->end - tmp;
-               ref->name = tmp;
-               ref->dir_path = ref->full_path->start;
+       else
                ref->dir_path_len = ref->full_path->end -
                                ref->full_path->start - 1 - ref->name_len;
-       }
 
        list_add_tail(&ref->list, head);
        return 0;
 }
 
+static int dup_ref(struct recorded_ref *ref, struct list_head *list)
+{
+       struct recorded_ref *new;
+
+       new = kmalloc(sizeof(*ref), GFP_NOFS);
+       if (!new)
+               return -ENOMEM;
+
+       new->dir = ref->dir;
+       new->dir_gen = ref->dir_gen;
+       new->full_path = NULL;
+       INIT_LIST_HEAD(&new->list);
+       list_add_tail(&new->list, list);
+       return 0;
+}
+
 static void __free_recorded_refs(struct list_head *head)
 {
        struct recorded_ref *cur;
@@ -2724,9 +2754,7 @@ static int process_recorded_refs(struct send_ctx *sctx)
        int ret = 0;
        struct recorded_ref *cur;
        struct recorded_ref *cur2;
-       struct ulist *check_dirs = NULL;
-       struct ulist_iterator uit;
-       struct ulist_node *un;
+       struct list_head check_dirs;
        struct fs_path *valid_path = NULL;
        u64 ow_inode = 0;
        u64 ow_gen;
@@ -2740,6 +2768,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
         * which is always '..'
         */
        BUG_ON(sctx->cur_ino <= BTRFS_FIRST_FREE_OBJECTID);
+       INIT_LIST_HEAD(&check_dirs);
 
        valid_path = fs_path_alloc();
        if (!valid_path) {
@@ -2747,12 +2776,6 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                goto out;
        }
 
-       check_dirs = ulist_alloc(GFP_NOFS);
-       if (!check_dirs) {
-               ret = -ENOMEM;
-               goto out;
-       }
-
        /*
         * First, check if the first ref of the current inode was overwritten
         * before. If yes, we know that the current inode was already orphanized
@@ -2889,8 +2912,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                        goto out;
                        }
                }
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                if (ret < 0)
                        goto out;
        }
@@ -2918,8 +2940,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                }
 
                list_for_each_entry(cur, &sctx->deleted_refs, list) {
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                        if (ret < 0)
                                goto out;
                }
@@ -2930,8 +2951,7 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                 */
                cur = list_entry(sctx->deleted_refs.next, struct recorded_ref,
                                list);
-               ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                               GFP_NOFS);
+               ret = dup_ref(cur, &check_dirs);
                if (ret < 0)
                        goto out;
        } else if (!S_ISDIR(sctx->cur_inode_mode)) {
@@ -2951,12 +2971,10 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
                                if (ret < 0)
                                        goto out;
                        }
-                       ret = ulist_add(check_dirs, cur->dir, cur->dir_gen,
-                                       GFP_NOFS);
+                       ret = dup_ref(cur, &check_dirs);
                        if (ret < 0)
                                goto out;
                }
-
                /*
                 * If the inode is still orphan, unlink the orphan. This may
                 * happen when a previous inode did overwrite the first ref
@@ -2978,33 +2996,32 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
         * deletion and if it's finally possible to perform the rmdir now.
         * We also update the inode stats of the parent dirs here.
         */
-       ULIST_ITER_INIT(&uit);
-       while ((un = ulist_next(check_dirs, &uit))) {
+       list_for_each_entry(cur, &check_dirs, list) {
                /*
                 * In case we had refs into dirs that were not processed yet,
                 * we don't need to do the utime and rmdir logic for these dirs.
                 * The dir will be processed later.
                 */
-               if (un->val > sctx->cur_ino)
+               if (cur->dir > sctx->cur_ino)
                        continue;
 
-               ret = get_cur_inode_state(sctx, un->val, un->aux);
+               ret = get_cur_inode_state(sctx, cur->dir, cur->dir_gen);
                if (ret < 0)
                        goto out;
 
                if (ret == inode_state_did_create ||
                    ret == inode_state_no_change) {
                        /* TODO delayed utimes */
-                       ret = send_utimes(sctx, un->val, un->aux);
+                       ret = send_utimes(sctx, cur->dir, cur->dir_gen);
                        if (ret < 0)
                                goto out;
                } else if (ret == inode_state_did_delete) {
-                       ret = can_rmdir(sctx, un->val, sctx->cur_ino);
+                       ret = can_rmdir(sctx, cur->dir, sctx->cur_ino);
                        if (ret < 0)
                                goto out;
                        if (ret) {
-                               ret = get_cur_path(sctx, un->val, un->aux,
-                                               valid_path);
+                               ret = get_cur_path(sctx, cur->dir,
+                                                  cur->dir_gen, valid_path);
                                if (ret < 0)
                                        goto out;
                                ret = send_rmdir(sctx, valid_path);
@@ -3017,8 +3034,8 @@ verbose_printk("btrfs: process_recorded_refs %llu\n", sctx->cur_ino);
        ret = 0;
 
 out:
+       __free_recorded_refs(&check_dirs);
        free_recorded_refs(sctx);
-       ulist_free(check_dirs);
        fs_path_free(valid_path);
        return ret;
 }
@@ -3119,6 +3136,8 @@ out:
 
 struct find_ref_ctx {
        u64 dir;
+       u64 dir_gen;
+       struct btrfs_root *root;
        struct fs_path *name;
        int found_idx;
 };
@@ -3128,9 +3147,21 @@ static int __find_iref(int num, u64 dir, int index,
                       void *ctx_)
 {
        struct find_ref_ctx *ctx = ctx_;
+       u64 dir_gen;
+       int ret;
 
        if (dir == ctx->dir && fs_path_len(name) == fs_path_len(ctx->name) &&
            strncmp(name->start, ctx->name->start, fs_path_len(name)) == 0) {
+               /*
+                * To avoid doing extra lookups we'll only do this if everything
+                * else matches.
+                */
+               ret = get_inode_info(ctx->root, dir, NULL, &dir_gen, NULL,
+                                    NULL, NULL, NULL);
+               if (ret)
+                       return ret;
+               if (dir_gen != ctx->dir_gen)
+                       return 0;
                ctx->found_idx = num;
                return 1;
        }
@@ -3140,14 +3171,16 @@ static int __find_iref(int num, u64 dir, int index,
 static int find_iref(struct btrfs_root *root,
                     struct btrfs_path *path,
                     struct btrfs_key *key,
-                    u64 dir, struct fs_path *name)
+                    u64 dir, u64 dir_gen, struct fs_path *name)
 {
        int ret;
        struct find_ref_ctx ctx;
 
        ctx.dir = dir;
        ctx.name = name;
+       ctx.dir_gen = dir_gen;
        ctx.found_idx = -1;
+       ctx.root = root;
 
        ret = iterate_inode_ref(root, path, key, 0, __find_iref, &ctx);
        if (ret < 0)
@@ -3163,11 +3196,17 @@ static int __record_changed_new_ref(int num, u64 dir, int index,
                                    struct fs_path *name,
                                    void *ctx)
 {
+       u64 dir_gen;
        int ret;
        struct send_ctx *sctx = ctx;
 
+       ret = get_inode_info(sctx->send_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+
        ret = find_iref(sctx->parent_root, sctx->right_path,
-                       sctx->cmp_key, dir, name);
+                       sctx->cmp_key, dir, dir_gen, name);
        if (ret == -ENOENT)
                ret = __record_new_ref(num, dir, index, name, sctx);
        else if (ret > 0)
@@ -3180,11 +3219,17 @@ static int __record_changed_deleted_ref(int num, u64 dir, int index,
                                        struct fs_path *name,
                                        void *ctx)
 {
+       u64 dir_gen;
        int ret;
        struct send_ctx *sctx = ctx;
 
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &dir_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+
        ret = find_iref(sctx->send_root, sctx->left_path, sctx->cmp_key,
-                       dir, name);
+                       dir, dir_gen, name);
        if (ret == -ENOENT)
                ret = __record_deleted_ref(num, dir, index, name, sctx);
        else if (ret > 0)
@@ -3869,7 +3914,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
        btrfs_item_key_to_cpu(eb, &found_key, slot);
        if (found_key.objectid != key.objectid ||
            found_key.type != key.type) {
-               ret = 0;
+               /* If we're a hole then just pretend nothing changed */
+               ret = (left_disknr) ? 0 : 1;
                goto out;
        }
 
@@ -3895,7 +3941,8 @@ static int is_extent_unchanged(struct send_ctx *sctx,
                 * This may only happen on the first iteration.
                 */
                if (found_key.offset + right_len <= ekey->offset) {
-                       ret = 0;
+                       /* If we're a hole just pretend nothing changed */
+                       ret = (left_disknr) ? 0 : 1;
                        goto out;
                }
 
@@ -3960,8 +4007,8 @@ static int process_extent(struct send_ctx *sctx,
                          struct btrfs_path *path,
                          struct btrfs_key *key)
 {
-       int ret = 0;
        struct clone_root *found_clone = NULL;
+       int ret = 0;
 
        if (S_ISLNK(sctx->cur_inode_mode))
                return 0;
@@ -3974,6 +4021,32 @@ static int process_extent(struct send_ctx *sctx,
                        ret = 0;
                        goto out;
                }
+       } else {
+               struct btrfs_file_extent_item *ei;
+               u8 type;
+
+               ei = btrfs_item_ptr(path->nodes[0], path->slots[0],
+                                   struct btrfs_file_extent_item);
+               type = btrfs_file_extent_type(path->nodes[0], ei);
+               if (type == BTRFS_FILE_EXTENT_PREALLOC ||
+                   type == BTRFS_FILE_EXTENT_REG) {
+                       /*
+                        * The send spec does not have a prealloc command yet,
+                        * so just leave a hole for prealloc'ed extents until
+                        * we have enough commands queued up to justify rev'ing
+                        * the send spec.
+                        */
+                       if (type == BTRFS_FILE_EXTENT_PREALLOC) {
+                               ret = 0;
+                               goto out;
+                       }
+
+                       /* Have a hole, just skip it. */
+                       if (btrfs_file_extent_disk_bytenr(path->nodes[0], ei) == 0) {
+                               ret = 0;
+                               goto out;
+                       }
+               }
        }
 
        ret = find_extent_clone(sctx, path, key->objectid, key->offset,
@@ -4361,6 +4434,64 @@ static int changed_extent(struct send_ctx *sctx,
        return ret;
 }
 
+static int dir_changed(struct send_ctx *sctx, u64 dir)
+{
+       u64 orig_gen, new_gen;
+       int ret;
+
+       ret = get_inode_info(sctx->send_root, dir, NULL, &new_gen, NULL, NULL,
+                            NULL, NULL);
+       if (ret)
+               return ret;
+
+       ret = get_inode_info(sctx->parent_root, dir, NULL, &orig_gen, NULL,
+                            NULL, NULL, NULL);
+       if (ret)
+               return ret;
+
+       return (orig_gen != new_gen) ? 1 : 0;
+}
+
+static int compare_refs(struct send_ctx *sctx, struct btrfs_path *path,
+                       struct btrfs_key *key)
+{
+       struct btrfs_inode_extref *extref;
+       struct extent_buffer *leaf;
+       u64 dirid = 0, last_dirid = 0;
+       unsigned long ptr;
+       u32 item_size;
+       u32 cur_offset = 0;
+       int ref_name_len;
+       int ret = 0;
+
+       /* Easy case, just check this one dirid */
+       if (key->type == BTRFS_INODE_REF_KEY) {
+               dirid = key->offset;
+
+               ret = dir_changed(sctx, dirid);
+               goto out;
+       }
+
+       leaf = path->nodes[0];
+       item_size = btrfs_item_size_nr(leaf, path->slots[0]);
+       ptr = btrfs_item_ptr_offset(leaf, path->slots[0]);
+       while (cur_offset < item_size) {
+               extref = (struct btrfs_inode_extref *)(ptr +
+                                                      cur_offset);
+               dirid = btrfs_inode_extref_parent(leaf, extref);
+               ref_name_len = btrfs_inode_extref_name_len(leaf, extref);
+               cur_offset += ref_name_len + sizeof(*extref);
+               if (dirid == last_dirid)
+                       continue;
+               ret = dir_changed(sctx, dirid);
+               if (ret)
+                       break;
+               last_dirid = dirid;
+       }
+out:
+       return ret;
+}
+
 /*
  * Updates compare related fields in sctx and simply forwards to the actual
  * changed_xxx functions.
@@ -4376,6 +4507,19 @@ static int changed_cb(struct btrfs_root *left_root,
        int ret = 0;
        struct send_ctx *sctx = ctx;
 
+       if (result == BTRFS_COMPARE_TREE_SAME) {
+               if (key->type != BTRFS_INODE_REF_KEY &&
+                   key->type != BTRFS_INODE_EXTREF_KEY)
+                       return 0;
+               ret = compare_refs(sctx, left_path, key);
+               if (!ret)
+                       return 0;
+               if (ret < 0)
+                       return ret;
+               result = BTRFS_COMPARE_TREE_CHANGED;
+               ret = 0;
+       }
+
        sctx->left_path = left_path;
        sctx->right_path = right_path;
        sctx->cmp_key = key;
index 8eb6191..3aab10c 100644 (file)
@@ -56,6 +56,8 @@
 #include "rcu-string.h"
 #include "dev-replace.h"
 #include "free-space-cache.h"
+#include "backref.h"
+#include "tests/btrfs-tests.h"
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/btrfs.h>
@@ -320,14 +322,15 @@ enum {
        Opt_enospc_debug, Opt_subvolrootid, Opt_defrag, Opt_inode_cache,
        Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
        Opt_check_integrity, Opt_check_integrity_including_extent_data,
-       Opt_check_integrity_print_mask, Opt_fatal_errors,
+       Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
+       Opt_commit_interval,
        Opt_err,
 };
 
 static match_table_t tokens = {
        {Opt_degraded, "degraded"},
        {Opt_subvol, "subvol=%s"},
-       {Opt_subvolid, "subvolid=%d"},
+       {Opt_subvolid, "subvolid=%s"},
        {Opt_device, "device=%s"},
        {Opt_nodatasum, "nodatasum"},
        {Opt_nodatacow, "nodatacow"},
@@ -360,7 +363,9 @@ static match_table_t tokens = {
        {Opt_check_integrity, "check_int"},
        {Opt_check_integrity_including_extent_data, "check_int_data"},
        {Opt_check_integrity_print_mask, "check_int_print_mask=%d"},
+       {Opt_rescan_uuid_tree, "rescan_uuid_tree"},
        {Opt_fatal_errors, "fatal_errors=%s"},
+       {Opt_commit_interval, "commit=%d"},
        {Opt_err, NULL},
 };
 
@@ -496,10 +501,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        btrfs_set_opt(info->mount_opt, NOBARRIER);
                        break;
                case Opt_thread_pool:
-                       intarg = 0;
-                       match_int(&args[0], &intarg);
-                       if (intarg)
+                       ret = match_int(&args[0], &intarg);
+                       if (ret) {
+                               goto out;
+                       } else if (intarg > 0) {
                                info->thread_pool_size = intarg;
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
+                       }
                        break;
                case Opt_max_inline:
                        num = match_strdup(&args[0]);
@@ -513,7 +523,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                                root->sectorsize);
                                }
                                printk(KERN_INFO "btrfs: max_inline at %llu\n",
-                                       (unsigned long long)info->max_inline);
+                                       info->max_inline);
+                       } else {
+                               ret = -ENOMEM;
+                               goto out;
                        }
                        break;
                case Opt_alloc_start:
@@ -525,7 +538,10 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                kfree(num);
                                printk(KERN_INFO
                                        "btrfs: allocations start at %llu\n",
-                                       (unsigned long long)info->alloc_start);
+                                       info->alloc_start);
+                       } else {
+                               ret = -ENOMEM;
+                               goto out;
                        }
                        break;
                case Opt_noacl:
@@ -540,12 +556,16 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        btrfs_set_opt(info->mount_opt, FLUSHONCOMMIT);
                        break;
                case Opt_ratio:
-                       intarg = 0;
-                       match_int(&args[0], &intarg);
-                       if (intarg) {
+                       ret = match_int(&args[0], &intarg);
+                       if (ret) {
+                               goto out;
+                       } else if (intarg >= 0) {
                                info->metadata_ratio = intarg;
                                printk(KERN_INFO "btrfs: metadata ratio %d\n",
                                       info->metadata_ratio);
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
                        }
                        break;
                case Opt_discard:
@@ -554,6 +574,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                case Opt_space_cache:
                        btrfs_set_opt(info->mount_opt, SPACE_CACHE);
                        break;
+               case Opt_rescan_uuid_tree:
+                       btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE);
+                       break;
                case Opt_no_space_cache:
                        printk(KERN_INFO "btrfs: disabling disk space caching\n");
                        btrfs_clear_opt(info->mount_opt, SPACE_CACHE);
@@ -596,13 +619,17 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                        btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY);
                        break;
                case Opt_check_integrity_print_mask:
-                       intarg = 0;
-                       match_int(&args[0], &intarg);
-                       if (intarg) {
+                       ret = match_int(&args[0], &intarg);
+                       if (ret) {
+                               goto out;
+                       } else if (intarg >= 0) {
                                info->check_integrity_print_mask = intarg;
                                printk(KERN_INFO "btrfs:"
                                       " check_integrity_print_mask 0x%x\n",
                                       info->check_integrity_print_mask);
+                       } else {
+                               ret = -EINVAL;
+                               goto out;
                        }
                        break;
 #else
@@ -626,6 +653,29 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
                                goto out;
                        }
                        break;
+               case Opt_commit_interval:
+                       intarg = 0;
+                       ret = match_int(&args[0], &intarg);
+                       if (ret < 0) {
+                               printk(KERN_ERR
+                                       "btrfs: invalid commit interval\n");
+                               ret = -EINVAL;
+                               goto out;
+                       }
+                       if (intarg > 0) {
+                               if (intarg > 300) {
+                                       printk(KERN_WARNING
+                                           "btrfs: excessive commit interval %d\n",
+                                                       intarg);
+                               }
+                               info->commit_interval = intarg;
+                       } else {
+                               printk(KERN_INFO
+                                   "btrfs: using default commit interval %ds\n",
+                                   BTRFS_DEFAULT_COMMIT_INTERVAL);
+                               info->commit_interval = BTRFS_DEFAULT_COMMIT_INTERVAL;
+                       }
+                       break;
                case Opt_err:
                        printk(KERN_INFO "btrfs: unrecognized mount option "
                               "'%s'\n", p);
@@ -654,8 +704,8 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
 {
        substring_t args[MAX_OPT_ARGS];
        char *device_name, *opts, *orig, *p;
+       char *num = NULL;
        int error = 0;
-       int intarg;
 
        if (!options)
                return 0;
@@ -679,17 +729,23 @@ static int btrfs_parse_early_options(const char *options, fmode_t flags,
                case Opt_subvol:
                        kfree(*subvol_name);
                        *subvol_name = match_strdup(&args[0]);
+                       if (!*subvol_name) {
+                               error = -ENOMEM;
+                               goto out;
+                       }
                        break;
                case Opt_subvolid:
-                       intarg = 0;
-                       error = match_int(&args[0], &intarg);
-                       if (!error) {
+                       num = match_strdup(&args[0]);
+                       if (num) {
+                               *subvol_objectid = memparse(num, NULL);
+                               kfree(num);
                                /* we want the original fs_tree */
-                               if (!intarg)
+                               if (!*subvol_objectid)
                                        *subvol_objectid =
                                                BTRFS_FS_TREE_OBJECTID;
-                               else
-                                       *subvol_objectid = intarg;
+                       } else {
+                               error = -EINVAL;
+                               goto out;
                        }
                        break;
                case Opt_subvolrootid:
@@ -892,11 +948,9 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
        if (btrfs_test_opt(root, NOBARRIER))
                seq_puts(seq, ",nobarrier");
        if (info->max_inline != 8192 * 1024)
-               seq_printf(seq, ",max_inline=%llu",
-                          (unsigned long long)info->max_inline);
+               seq_printf(seq, ",max_inline=%llu", info->max_inline);
        if (info->alloc_start != 0)
-               seq_printf(seq, ",alloc_start=%llu",
-                          (unsigned long long)info->alloc_start);
+               seq_printf(seq, ",alloc_start=%llu", info->alloc_start);
        if (info->thread_pool_size !=  min_t(unsigned long,
                                             num_online_cpus() + 2, 8))
                seq_printf(seq, ",thread_pool=%d", info->thread_pool_size);
@@ -928,6 +982,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",space_cache");
        else
                seq_puts(seq, ",nospace_cache");
+       if (btrfs_test_opt(root, RESCAN_UUID_TREE))
+               seq_puts(seq, ",rescan_uuid_tree");
        if (btrfs_test_opt(root, CLEAR_CACHE))
                seq_puts(seq, ",clear_cache");
        if (btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
@@ -940,8 +996,24 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
                seq_puts(seq, ",inode_cache");
        if (btrfs_test_opt(root, SKIP_BALANCE))
                seq_puts(seq, ",skip_balance");
+       if (btrfs_test_opt(root, RECOVERY))
+               seq_puts(seq, ",recovery");
+#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
+       if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
+               seq_puts(seq, ",check_int_data");
+       else if (btrfs_test_opt(root, CHECK_INTEGRITY))
+               seq_puts(seq, ",check_int");
+       if (info->check_integrity_print_mask)
+               seq_printf(seq, ",check_int_print_mask=%d",
+                               info->check_integrity_print_mask);
+#endif
+       if (info->metadata_ratio)
+               seq_printf(seq, ",metadata_ratio=%d",
+                               info->metadata_ratio);
        if (btrfs_test_opt(root, PANIC_ON_FATAL_ERROR))
                seq_puts(seq, ",fatal_errors=panic");
+       if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL)
+               seq_printf(seq, ",commit=%d", info->commit_interval);
        return 0;
 }
 
@@ -1696,6 +1768,11 @@ static void btrfs_print_info(void)
                        "\n");
 }
 
+static int btrfs_run_sanity_tests(void)
+{
+       return btrfs_test_free_space_cache();
+}
+
 static int __init init_btrfs_fs(void)
 {
        int err;
@@ -1734,23 +1811,32 @@ static int __init init_btrfs_fs(void)
        if (err)
                goto free_auto_defrag;
 
-       err = btrfs_interface_init();
+       err = btrfs_prelim_ref_init();
        if (err)
-               goto free_delayed_ref;
+               goto free_prelim_ref;
 
-       err = register_filesystem(&btrfs_fs_type);
+       err = btrfs_interface_init();
        if (err)
-               goto unregister_ioctl;
+               goto free_delayed_ref;
 
        btrfs_init_lockdep();
 
        btrfs_print_info();
-       btrfs_test_free_space_cache();
+
+       err = btrfs_run_sanity_tests();
+       if (err)
+               goto unregister_ioctl;
+
+       err = register_filesystem(&btrfs_fs_type);
+       if (err)
+               goto unregister_ioctl;
 
        return 0;
 
 unregister_ioctl:
        btrfs_interface_exit();
+free_prelim_ref:
+       btrfs_prelim_ref_exit();
 free_delayed_ref:
        btrfs_delayed_ref_exit();
 free_auto_defrag:
@@ -1777,6 +1863,7 @@ static void __exit exit_btrfs_fs(void)
        btrfs_delayed_ref_exit();
        btrfs_auto_defrag_exit();
        btrfs_delayed_inode_exit();
+       btrfs_prelim_ref_exit();
        ordered_data_exit();
        extent_map_exit();
        extent_io_exit();
diff --git a/fs/btrfs/tests/btrfs-tests.h b/fs/btrfs/tests/btrfs-tests.h
new file mode 100644 (file)
index 0000000..5808776
--- /dev/null
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) 2013 Fusion IO.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#ifndef __BTRFS_TESTS
+#define __BTRFS_TESTS
+
+#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
+
+#define test_msg(fmt, ...) pr_info("btrfs: selftest: " fmt, ##__VA_ARGS__)
+
+int btrfs_test_free_space_cache(void);
+#else
+static inline int btrfs_test_free_space_cache(void)
+{
+       return 0;
+}
+#endif
+
+#endif
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
new file mode 100644 (file)
index 0000000..6fc8201
--- /dev/null
@@ -0,0 +1,395 @@
+/*
+ * Copyright (C) 2013 Fusion IO.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include <linux/slab.h>
+#include "btrfs-tests.h"
+#include "../ctree.h"
+#include "../free-space-cache.h"
+
+#define BITS_PER_BITMAP                (PAGE_CACHE_SIZE * 8)
+static struct btrfs_block_group_cache *init_test_block_group(void)
+{
+       struct btrfs_block_group_cache *cache;
+
+       cache = kzalloc(sizeof(*cache), GFP_NOFS);
+       if (!cache)
+               return NULL;
+       cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl),
+                                       GFP_NOFS);
+       if (!cache->free_space_ctl) {
+               kfree(cache);
+               return NULL;
+       }
+
+       cache->key.objectid = 0;
+       cache->key.offset = 1024 * 1024 * 1024;
+       cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY;
+       cache->sectorsize = 4096;
+
+       spin_lock_init(&cache->lock);
+       INIT_LIST_HEAD(&cache->list);
+       INIT_LIST_HEAD(&cache->cluster_list);
+       INIT_LIST_HEAD(&cache->new_bg_list);
+
+       btrfs_init_free_space_ctl(cache);
+
+       return cache;
+}
+
+/*
+ * This test just does basic sanity checking, making sure we can add an exten
+ * entry and remove space from either end and the middle, and make sure we can
+ * remove space that covers adjacent extent entries.
+ */
+static int test_extents(struct btrfs_block_group_cache *cache)
+{
+       int ret = 0;
+
+       test_msg("Running extent only tests\n");
+
+       /* First just make sure we can remove an entire entry */
+       ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error adding initial extents %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error removing extent %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+               test_msg("Full remove left some lingering space\n");
+               return -1;
+       }
+
+       /* Ok edge and middle cases now */
+       ret = btrfs_add_free_space(cache, 0, 4 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error adding half extent %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 1 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error removing tail end %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error removing front end %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 2 * 1024 * 1024, 4096);
+       if (ret) {
+               test_msg("Error removing middle peice %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+               test_msg("Still have space at the front\n");
+               return -1;
+       }
+
+       if (test_check_exists(cache, 2 * 1024 * 1024, 4096)) {
+               test_msg("Still have space in the middle\n");
+               return -1;
+       }
+
+       if (test_check_exists(cache, 3 * 1024 * 1024, 1 * 1024 * 1024)) {
+               test_msg("Still have space at the end\n");
+               return -1;
+       }
+
+       /* Cleanup */
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+       return 0;
+}
+
+static int test_bitmaps(struct btrfs_block_group_cache *cache)
+{
+       u64 next_bitmap_offset;
+       int ret;
+
+       test_msg("Running bitmap only tests\n");
+
+       ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't create a bitmap entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 0, 4 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error removing bitmap full range %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 0, 4 * 1024 * 1024)) {
+               test_msg("Left some space in bitmap\n");
+               return -1;
+       }
+
+       ret = test_add_free_space_entry(cache, 0, 4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add to our bitmap entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 2 * 1024 * 1024);
+       if (ret) {
+               test_msg("Couldn't remove middle chunk %d\n", ret);
+               return ret;
+       }
+
+       /*
+        * The first bitmap we have starts at offset 0 so the next one is just
+        * at the end of the first bitmap.
+        */
+       next_bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+
+       /* Test a bit straddling two bitmaps */
+       ret = test_add_free_space_entry(cache, next_bitmap_offset -
+                                  (2 * 1024 * 1024), 4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add space that straddles two bitmaps %d\n",
+                               ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, next_bitmap_offset -
+                                     (1 * 1024 * 1024), 2 * 1024 * 1024);
+       if (ret) {
+               test_msg("Couldn't remove overlapping space %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, next_bitmap_offset - (1 * 1024 * 1024),
+                        2 * 1024 * 1024)) {
+               test_msg("Left some space when removing overlapping\n");
+               return -1;
+       }
+
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+       return 0;
+}
+
+/* This is the high grade jackassery */
+static int test_bitmaps_and_extents(struct btrfs_block_group_cache *cache)
+{
+       u64 bitmap_offset = (u64)(BITS_PER_BITMAP * 4096);
+       int ret;
+
+       test_msg("Running bitmap and extent tests\n");
+
+       /*
+        * First let's do something simple, an extent at the same offset as the
+        * bitmap, but the free space completely in the extent and then
+        * completely in the bitmap.
+        */
+       ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 1 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't create bitmap entry %d\n", ret);
+               return ret;
+       }
+
+       ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+       if (ret) {
+               test_msg("Couldn't add extent entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 0, 1 * 1024 * 1024);
+       if (ret) {
+               test_msg("Couldn't remove extent entry %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 0, 1 * 1024 * 1024)) {
+               test_msg("Left remnants after our remove\n");
+               return -1;
+       }
+
+       /* Now to add back the extent entry and remove from the bitmap */
+       ret = test_add_free_space_entry(cache, 0, 1 * 1024 * 1024, 0);
+       if (ret) {
+               test_msg("Couldn't re-add extent entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 4 * 1024 * 1024, 1 * 1024 * 1024);
+       if (ret) {
+               test_msg("Couldn't remove from bitmap %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 4 * 1024 * 1024, 1 * 1024 * 1024)) {
+               test_msg("Left remnants in the bitmap\n");
+               return -1;
+       }
+
+       /*
+        * Ok so a little more evil, extent entry and bitmap at the same offset,
+        * removing an overlapping chunk.
+        */
+       ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add to a bitmap %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 512 * 1024, 3 * 1024 * 1024);
+       if (ret) {
+               test_msg("Couldn't remove overlapping space %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 512 * 1024, 3 * 1024 * 1024)) {
+               test_msg("Left over peices after removing overlapping\n");
+               return -1;
+       }
+
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+       /* Now with the extent entry offset into the bitmap */
+       ret = test_add_free_space_entry(cache, 4 * 1024 * 1024, 4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add space to the bitmap %d\n", ret);
+               return ret;
+       }
+
+       ret = test_add_free_space_entry(cache, 2 * 1024 * 1024, 2 * 1024 * 1024, 0);
+       if (ret) {
+               test_msg("Couldn't add extent to the cache %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 3 * 1024 * 1024, 4 * 1024 * 1024);
+       if (ret) {
+               test_msg("Problem removing overlapping space %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, 3 * 1024 * 1024, 4 * 1024 * 1024)) {
+               test_msg("Left something behind when removing space");
+               return -1;
+       }
+
+       /*
+        * This has blown up in the past, the extent entry starts before the
+        * bitmap entry, but we're trying to remove an offset that falls
+        * completely within the bitmap range and is in both the extent entry
+        * and the bitmap entry, looks like this
+        *
+        *   [ extent ]
+        *      [ bitmap ]
+        *        [ del ]
+        */
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+       ret = test_add_free_space_entry(cache, bitmap_offset + 4 * 1024 * 1024,
+                                  4 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add bitmap %d\n", ret);
+               return ret;
+       }
+
+       ret = test_add_free_space_entry(cache, bitmap_offset - 1 * 1024 * 1024,
+                                  5 * 1024 * 1024, 0);
+       if (ret) {
+               test_msg("Couldn't add extent entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, bitmap_offset + 1 * 1024 * 1024,
+                                     5 * 1024 * 1024);
+       if (ret) {
+               test_msg("Failed to free our space %d\n", ret);
+               return ret;
+       }
+
+       if (test_check_exists(cache, bitmap_offset + 1 * 1024 * 1024,
+                        5 * 1024 * 1024)) {
+               test_msg("Left stuff over\n");
+               return -1;
+       }
+
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+
+       /*
+        * This blew up before, we have part of the free space in a bitmap and
+        * then the entirety of the rest of the space in an extent.  This used
+        * to return -EAGAIN back from btrfs_remove_extent, make sure this
+        * doesn't happen.
+        */
+       ret = test_add_free_space_entry(cache, 1 * 1024 * 1024, 2 * 1024 * 1024, 1);
+       if (ret) {
+               test_msg("Couldn't add bitmap entry %d\n", ret);
+               return ret;
+       }
+
+       ret = test_add_free_space_entry(cache, 3 * 1024 * 1024, 1 * 1024 * 1024, 0);
+       if (ret) {
+               test_msg("Couldn't add extent entry %d\n", ret);
+               return ret;
+       }
+
+       ret = btrfs_remove_free_space(cache, 1 * 1024 * 1024, 3 * 1024 * 1024);
+       if (ret) {
+               test_msg("Error removing bitmap and extent overlapping %d\n", ret);
+               return ret;
+       }
+
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+       return 0;
+}
+
+int btrfs_test_free_space_cache(void)
+{
+       struct btrfs_block_group_cache *cache;
+       int ret;
+
+       test_msg("Running btrfs free space cache tests\n");
+
+       cache = init_test_block_group();
+       if (!cache) {
+               test_msg("Couldn't run the tests\n");
+               return 0;
+       }
+
+       ret = test_extents(cache);
+       if (ret)
+               goto out;
+       ret = test_bitmaps(cache);
+       if (ret)
+               goto out;
+       ret = test_bitmaps_and_extents(cache);
+       if (ret)
+               goto out;
+out:
+       __btrfs_remove_free_space_cache(cache->free_space_ctl);
+       kfree(cache->free_space_ctl);
+       kfree(cache);
+       test_msg("Free space cache tests finished\n");
+       return ret;
+}
index af1931a..cac4a3f 100644 (file)
@@ -837,7 +837,7 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
  * them in one of two extent_io trees.  This is used to make sure all of
  * those extents are on disk for transaction or log commit
  */
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
                                struct extent_io_tree *dirty_pages, int mark)
 {
        int ret;
@@ -1225,8 +1225,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                btrfs_set_root_stransid(new_root_item, 0);
                btrfs_set_root_rtransid(new_root_item, 0);
        }
-       new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec);
-       new_root_item->otime.nsec = cpu_to_le32(cur_time.tv_nsec);
+       btrfs_set_stack_timespec_sec(&new_root_item->otime, cur_time.tv_sec);
+       btrfs_set_stack_timespec_nsec(&new_root_item->otime, cur_time.tv_nsec);
        btrfs_set_root_otransid(new_root_item, trans->transid);
 
        old = btrfs_lock_root_node(root);
@@ -1311,8 +1311,26 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
                                         dentry->d_name.len * 2);
        parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
        ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
-       if (ret)
+       if (ret) {
+               btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root, new_uuid.b,
+                                 BTRFS_UUID_KEY_SUBVOL, objectid);
+       if (ret) {
                btrfs_abort_transaction(trans, root, ret);
+               goto fail;
+       }
+       if (!btrfs_is_empty_uuid(new_root_item->received_uuid)) {
+               ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                         new_root_item->received_uuid,
+                                         BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                         objectid);
+               if (ret && ret != -EEXIST) {
+                       btrfs_abort_transaction(trans, root, ret);
+                       goto fail;
+               }
+       }
 fail:
        pending->error = ret;
 dir_item_existed:
@@ -1362,6 +1380,8 @@ static void update_super_roots(struct btrfs_root *root)
        super->root_level = root_item->level;
        if (btrfs_test_opt(root, SPACE_CACHE))
                super->cache_generation = root_item->generation;
+       if (root->fs_info->update_uuid_tree_gen)
+               super->uuid_tree_generation = root_item->generation;
 }
 
 int btrfs_transaction_in_commit(struct btrfs_fs_info *info)
@@ -1928,8 +1948,7 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
        list_del_init(&root->root_list);
        spin_unlock(&fs_info->trans_lock);
 
-       pr_debug("btrfs: cleaner removing %llu\n",
-                       (unsigned long long)root->objectid);
+       pr_debug("btrfs: cleaner removing %llu\n", root->objectid);
 
        btrfs_kill_all_delayed_nodes(root);
 
@@ -1942,6 +1961,5 @@ int btrfs_clean_one_deleted_snapshot(struct btrfs_root *root)
         * If we encounter a transaction abort during snapshot cleaning, we
         * don't want to crash here
         */
-       BUG_ON(ret < 0 && ret != -EAGAIN && ret != -EROFS);
-       return 1;
+       return (ret < 0) ? 0 : 1;
 }
index defbc42..5c2af84 100644 (file)
@@ -160,8 +160,6 @@ int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 void btrfs_throttle(struct btrfs_root *root);
 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
                                struct btrfs_root *root);
-int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
-                               struct extent_io_tree *dirty_pages, int mark);
 int btrfs_write_marked_extents(struct btrfs_root *root,
                                struct extent_io_tree *dirty_pages, int mark);
 int btrfs_wait_marked_extents(struct btrfs_root *root,
index ff60d89..0d9613c 100644 (file)
@@ -747,7 +747,8 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans,
        ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len);
        if (ret)
                goto out;
-       btrfs_run_delayed_items(trans, root);
+       else
+               ret = btrfs_run_delayed_items(trans, root);
 out:
        kfree(name);
        iput(inode);
@@ -923,7 +924,9 @@ again:
                                kfree(victim_name);
                                if (ret)
                                        return ret;
-                               btrfs_run_delayed_items(trans, root);
+                               ret = btrfs_run_delayed_items(trans, root);
+                               if (ret)
+                                       return ret;
                                *search_done = 1;
                                goto again;
                        }
@@ -990,7 +993,9 @@ again:
                                                                 inode,
                                                                 victim_name,
                                                                 victim_name_len);
-                                       btrfs_run_delayed_items(trans, root);
+                                       if (!ret)
+                                               ret = btrfs_run_delayed_items(
+                                                                 trans, root);
                                }
                                iput(victim_parent);
                                kfree(victim_name);
@@ -1536,8 +1541,10 @@ static noinline int replay_one_name(struct btrfs_trans_handle *trans,
 
        name_len = btrfs_dir_name_len(eb, di);
        name = kmalloc(name_len, GFP_NOFS);
-       if (!name)
-               return -ENOMEM;
+       if (!name) {
+               ret = -ENOMEM;
+               goto out;
+       }
 
        log_type = btrfs_dir_type(eb, di);
        read_extent_buffer(eb, name, (unsigned long)(di + 1),
@@ -1810,7 +1817,7 @@ again:
                        ret = btrfs_unlink_inode(trans, root, dir, inode,
                                                 name, name_len);
                        if (!ret)
-                               btrfs_run_delayed_items(trans, root);
+                               ret = btrfs_run_delayed_items(trans, root);
                        kfree(name);
                        iput(inode);
                        if (ret)
diff --git a/fs/btrfs/uuid-tree.c b/fs/btrfs/uuid-tree.c
new file mode 100644 (file)
index 0000000..dd0dea3
--- /dev/null
@@ -0,0 +1,358 @@
+/*
+ * Copyright (C) STRATO AG 2013.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#include <linux/uuid.h>
+#include <asm/unaligned.h>
+#include "ctree.h"
+#include "transaction.h"
+#include "disk-io.h"
+#include "print-tree.h"
+
+
+static void btrfs_uuid_to_key(u8 *uuid, u8 type, struct btrfs_key *key)
+{
+       key->type = type;
+       key->objectid = get_unaligned_le64(uuid);
+       key->offset = get_unaligned_le64(uuid + sizeof(u64));
+}
+
+/* return -ENOENT for !found, < 0 for errors, or 0 if an item was found */
+static int btrfs_uuid_tree_lookup(struct btrfs_root *uuid_root, u8 *uuid,
+                                 u8 type, u64 subid)
+{
+       int ret;
+       struct btrfs_path *path = NULL;
+       struct extent_buffer *eb;
+       int slot;
+       u32 item_size;
+       unsigned long offset;
+       struct btrfs_key key;
+
+       if (WARN_ON_ONCE(!uuid_root)) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       btrfs_uuid_to_key(uuid, type, &key);
+       ret = btrfs_search_slot(NULL, uuid_root, &key, path, 0, 0);
+       if (ret < 0) {
+               goto out;
+       } else if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       item_size = btrfs_item_size_nr(eb, slot);
+       offset = btrfs_item_ptr_offset(eb, slot);
+       ret = -ENOENT;
+
+       if (!IS_ALIGNED(item_size, sizeof(u64))) {
+               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+                       (unsigned long)item_size);
+               goto out;
+       }
+       while (item_size) {
+               __le64 data;
+
+               read_extent_buffer(eb, &data, offset, sizeof(data));
+               if (le64_to_cpu(data) == subid) {
+                       ret = 0;
+                       break;
+               }
+               offset += sizeof(data);
+               item_size -= sizeof(data);
+       }
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_uuid_tree_add(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+                       u64 subid_cpu)
+{
+       int ret;
+       struct btrfs_path *path = NULL;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
+       int slot;
+       unsigned long offset;
+       __le64 subid_le;
+
+       ret = btrfs_uuid_tree_lookup(uuid_root, uuid, type, subid_cpu);
+       if (ret != -ENOENT)
+               return ret;
+
+       if (WARN_ON_ONCE(!uuid_root)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       btrfs_uuid_to_key(uuid, type, &key);
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = btrfs_insert_empty_item(trans, uuid_root, path, &key,
+                                     sizeof(subid_le));
+       if (ret >= 0) {
+               /* Add an item for the type for the first time */
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               offset = btrfs_item_ptr_offset(eb, slot);
+       } else if (ret == -EEXIST) {
+               /*
+                * An item with that type already exists.
+                * Extend the item and store the new subid at the end.
+                */
+               btrfs_extend_item(uuid_root, path, sizeof(subid_le));
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               offset = btrfs_item_ptr_offset(eb, slot);
+               offset += btrfs_item_size_nr(eb, slot) - sizeof(subid_le);
+       } else if (ret < 0) {
+               pr_warn("btrfs: insert uuid item failed %d (0x%016llx, 0x%016llx) type %u!\n",
+                       ret, (unsigned long long)key.objectid,
+                       (unsigned long long)key.offset, type);
+               goto out;
+       }
+
+       ret = 0;
+       subid_le = cpu_to_le64(subid_cpu);
+       write_extent_buffer(eb, &subid_le, offset, sizeof(subid_le));
+       btrfs_mark_buffer_dirty(eb);
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+int btrfs_uuid_tree_rem(struct btrfs_trans_handle *trans,
+                       struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+                       u64 subid)
+{
+       int ret;
+       struct btrfs_path *path = NULL;
+       struct btrfs_key key;
+       struct extent_buffer *eb;
+       int slot;
+       unsigned long offset;
+       u32 item_size;
+       unsigned long move_dst;
+       unsigned long move_src;
+       unsigned long move_len;
+
+       if (WARN_ON_ONCE(!uuid_root)) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       btrfs_uuid_to_key(uuid, type, &key);
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       ret = btrfs_search_slot(trans, uuid_root, &key, path, -1, 1);
+       if (ret < 0) {
+               pr_warn("btrfs: error %d while searching for uuid item!\n",
+                       ret);
+               goto out;
+       }
+       if (ret > 0) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       eb = path->nodes[0];
+       slot = path->slots[0];
+       offset = btrfs_item_ptr_offset(eb, slot);
+       item_size = btrfs_item_size_nr(eb, slot);
+       if (!IS_ALIGNED(item_size, sizeof(u64))) {
+               pr_warn("btrfs: uuid item with illegal size %lu!\n",
+                       (unsigned long)item_size);
+               ret = -ENOENT;
+               goto out;
+       }
+       while (item_size) {
+               __le64 read_subid;
+
+               read_extent_buffer(eb, &read_subid, offset, sizeof(read_subid));
+               if (le64_to_cpu(read_subid) == subid)
+                       break;
+               offset += sizeof(read_subid);
+               item_size -= sizeof(read_subid);
+       }
+
+       if (!item_size) {
+               ret = -ENOENT;
+               goto out;
+       }
+
+       item_size = btrfs_item_size_nr(eb, slot);
+       if (item_size == sizeof(subid)) {
+               ret = btrfs_del_item(trans, uuid_root, path);
+               goto out;
+       }
+
+       move_dst = offset;
+       move_src = offset + sizeof(subid);
+       move_len = item_size - (move_src - btrfs_item_ptr_offset(eb, slot));
+       memmove_extent_buffer(eb, move_dst, move_src, move_len);
+       btrfs_truncate_item(uuid_root, path, item_size - sizeof(subid), 1);
+
+out:
+       btrfs_free_path(path);
+       return ret;
+}
+
+static int btrfs_uuid_iter_rem(struct btrfs_root *uuid_root, u8 *uuid, u8 type,
+                              u64 subid)
+{
+       struct btrfs_trans_handle *trans;
+       int ret;
+
+       /* 1 - for the uuid item */
+       trans = btrfs_start_transaction(uuid_root, 1);
+       if (IS_ERR(trans)) {
+               ret = PTR_ERR(trans);
+               goto out;
+       }
+
+       ret = btrfs_uuid_tree_rem(trans, uuid_root, uuid, type, subid);
+       btrfs_end_transaction(trans, uuid_root);
+
+out:
+       return ret;
+}
+
+int btrfs_uuid_tree_iterate(struct btrfs_fs_info *fs_info,
+                           int (*check_func)(struct btrfs_fs_info *, u8 *, u8,
+                                             u64))
+{
+       struct btrfs_root *root = fs_info->uuid_root;
+       struct btrfs_key key;
+       struct btrfs_key max_key;
+       struct btrfs_path *path;
+       int ret = 0;
+       struct extent_buffer *leaf;
+       int slot;
+       u32 item_size;
+       unsigned long offset;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       key.objectid = 0;
+       key.type = 0;
+       key.offset = 0;
+       max_key.objectid = (u64)-1;
+       max_key.type = (u8)-1;
+       max_key.offset = (u64)-1;
+
+again_search_slot:
+       path->keep_locks = 1;
+       ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+       if (ret) {
+               if (ret > 0)
+                       ret = 0;
+               goto out;
+       }
+
+       while (1) {
+               cond_resched();
+               leaf = path->nodes[0];
+               slot = path->slots[0];
+               btrfs_item_key_to_cpu(leaf, &key, slot);
+
+               if (key.type != BTRFS_UUID_KEY_SUBVOL &&
+                   key.type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+                       goto skip;
+
+               offset = btrfs_item_ptr_offset(leaf, slot);
+               item_size = btrfs_item_size_nr(leaf, slot);
+               if (!IS_ALIGNED(item_size, sizeof(u64))) {
+                       pr_warn("btrfs: uuid item with illegal size %lu!\n",
+                               (unsigned long)item_size);
+                       goto skip;
+               }
+               while (item_size) {
+                       u8 uuid[BTRFS_UUID_SIZE];
+                       __le64 subid_le;
+                       u64 subid_cpu;
+
+                       put_unaligned_le64(key.objectid, uuid);
+                       put_unaligned_le64(key.offset, uuid + sizeof(u64));
+                       read_extent_buffer(leaf, &subid_le, offset,
+                                          sizeof(subid_le));
+                       subid_cpu = le64_to_cpu(subid_le);
+                       ret = check_func(fs_info, uuid, key.type, subid_cpu);
+                       if (ret < 0)
+                               goto out;
+                       if (ret > 0) {
+                               btrfs_release_path(path);
+                               ret = btrfs_uuid_iter_rem(root, uuid, key.type,
+                                                         subid_cpu);
+                               if (ret == 0) {
+                                       /*
+                                        * this might look inefficient, but the
+                                        * justification is that it is an
+                                        * exception that check_func returns 1,
+                                        * and that in the regular case only one
+                                        * entry per UUID exists.
+                                        */
+                                       goto again_search_slot;
+                               }
+                               if (ret < 0 && ret != -ENOENT)
+                                       goto out;
+                       }
+                       item_size -= sizeof(subid_le);
+                       offset += sizeof(subid_le);
+               }
+
+skip:
+               ret = btrfs_next_item(root, path);
+               if (ret == 0)
+                       continue;
+               else if (ret > 0)
+                       ret = 0;
+               break;
+       }
+
+out:
+       btrfs_free_path(path);
+       if (ret)
+               pr_warn("btrfs: btrfs_uuid_tree_iterate failed %d\n", ret);
+       return 0;
+}
index 67a0853..0052ca8 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/ratelimit.h>
 #include <linux/kthread.h>
 #include <linux/raid/pq.h>
+#include <linux/semaphore.h>
 #include <asm/div64.h>
 #include "compat.h"
 #include "ctree.h"
@@ -62,6 +63,48 @@ static void unlock_chunks(struct btrfs_root *root)
        mutex_unlock(&root->fs_info->chunk_mutex);
 }
 
+static struct btrfs_fs_devices *__alloc_fs_devices(void)
+{
+       struct btrfs_fs_devices *fs_devs;
+
+       fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+       if (!fs_devs)
+               return ERR_PTR(-ENOMEM);
+
+       mutex_init(&fs_devs->device_list_mutex);
+
+       INIT_LIST_HEAD(&fs_devs->devices);
+       INIT_LIST_HEAD(&fs_devs->alloc_list);
+       INIT_LIST_HEAD(&fs_devs->list);
+
+       return fs_devs;
+}
+
+/**
+ * alloc_fs_devices - allocate struct btrfs_fs_devices
+ * @fsid:      a pointer to UUID for this FS.  If NULL a new UUID is
+ *             generated.
+ *
+ * Return: a pointer to a new &struct btrfs_fs_devices on success;
+ * ERR_PTR() on error.  Returned struct is not linked onto any lists and
+ * can be destroyed with kfree() right away.
+ */
+static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
+{
+       struct btrfs_fs_devices *fs_devs;
+
+       fs_devs = __alloc_fs_devices();
+       if (IS_ERR(fs_devs))
+               return fs_devs;
+
+       if (fsid)
+               memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
+       else
+               generate_random_uuid(fs_devs->fsid);
+
+       return fs_devs;
+}
+
 static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
 {
        struct btrfs_device *device;
@@ -101,6 +144,27 @@ void btrfs_cleanup_fs_uuids(void)
        }
 }
 
+static struct btrfs_device *__alloc_device(void)
+{
+       struct btrfs_device *dev;
+
+       dev = kzalloc(sizeof(*dev), GFP_NOFS);
+       if (!dev)
+               return ERR_PTR(-ENOMEM);
+
+       INIT_LIST_HEAD(&dev->dev_list);
+       INIT_LIST_HEAD(&dev->dev_alloc_list);
+
+       spin_lock_init(&dev->io_lock);
+
+       spin_lock_init(&dev->reada_lock);
+       atomic_set(&dev->reada_in_flight, 0);
+       INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_WAIT);
+       INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_WAIT);
+
+       return dev;
+}
+
 static noinline struct btrfs_device *__find_device(struct list_head *head,
                                                   u64 devid, u8 *uuid)
 {
@@ -395,16 +459,14 @@ static noinline int device_list_add(const char *path,
 
        fs_devices = find_fsid(disk_super->fsid);
        if (!fs_devices) {
-               fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-               if (!fs_devices)
-                       return -ENOMEM;
-               INIT_LIST_HEAD(&fs_devices->devices);
-               INIT_LIST_HEAD(&fs_devices->alloc_list);
+               fs_devices = alloc_fs_devices(disk_super->fsid);
+               if (IS_ERR(fs_devices))
+                       return PTR_ERR(fs_devices);
+
                list_add(&fs_devices->list, &fs_uuids);
-               memcpy(fs_devices->fsid, disk_super->fsid, BTRFS_FSID_SIZE);
                fs_devices->latest_devid = devid;
                fs_devices->latest_trans = found_transid;
-               mutex_init(&fs_devices->device_list_mutex);
+
                device = NULL;
        } else {
                device = __find_device(&fs_devices->devices, devid,
@@ -414,17 +476,12 @@ static noinline int device_list_add(const char *path,
                if (fs_devices->opened)
                        return -EBUSY;
 
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device) {
+               device = btrfs_alloc_device(NULL, &devid,
+                                           disk_super->dev_item.uuid);
+               if (IS_ERR(device)) {
                        /* we can safely leave the fs_devices entry around */
-                       return -ENOMEM;
+                       return PTR_ERR(device);
                }
-               device->devid = devid;
-               device->dev_stats_valid = 0;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, disk_super->dev_item.uuid,
-                      BTRFS_UUID_SIZE);
-               spin_lock_init(&device->io_lock);
 
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name) {
@@ -432,22 +489,13 @@ static noinline int device_list_add(const char *path,
                        return -ENOMEM;
                }
                rcu_assign_pointer(device->name, name);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
-
-               /* init readahead state */
-               spin_lock_init(&device->reada_lock);
-               device->reada_curr_zone = NULL;
-               atomic_set(&device->reada_in_flight, 0);
-               device->reada_next = 0;
-               INIT_RADIX_TREE(&device->reada_zones, GFP_NOFS & ~__GFP_WAIT);
-               INIT_RADIX_TREE(&device->reada_extents, GFP_NOFS & ~__GFP_WAIT);
 
                mutex_lock(&fs_devices->device_list_mutex);
                list_add_rcu(&device->dev_list, &fs_devices->devices);
+               fs_devices->num_devices++;
                mutex_unlock(&fs_devices->device_list_mutex);
 
                device->fs_devices = fs_devices;
-               fs_devices->num_devices++;
        } else if (!device->name || strcmp(device->name->str, path)) {
                name = rcu_string_strdup(path, GFP_NOFS);
                if (!name)
@@ -474,25 +522,21 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
        struct btrfs_device *device;
        struct btrfs_device *orig_dev;
 
-       fs_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!fs_devices)
-               return ERR_PTR(-ENOMEM);
+       fs_devices = alloc_fs_devices(orig->fsid);
+       if (IS_ERR(fs_devices))
+               return fs_devices;
 
-       INIT_LIST_HEAD(&fs_devices->devices);
-       INIT_LIST_HEAD(&fs_devices->alloc_list);
-       INIT_LIST_HEAD(&fs_devices->list);
-       mutex_init(&fs_devices->device_list_mutex);
        fs_devices->latest_devid = orig->latest_devid;
        fs_devices->latest_trans = orig->latest_trans;
        fs_devices->total_devices = orig->total_devices;
-       memcpy(fs_devices->fsid, orig->fsid, sizeof(fs_devices->fsid));
 
        /* We have held the volume lock, it is safe to get the devices. */
        list_for_each_entry(orig_dev, &orig->devices, dev_list) {
                struct rcu_string *name;
 
-               device = kzalloc(sizeof(*device), GFP_NOFS);
-               if (!device)
+               device = btrfs_alloc_device(NULL, &orig_dev->devid,
+                                           orig_dev->uuid);
+               if (IS_ERR(device))
                        goto error;
 
                /*
@@ -506,13 +550,6 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
                }
                rcu_assign_pointer(device->name, name);
 
-               device->devid = orig_dev->devid;
-               device->work.func = pending_bios_fn;
-               memcpy(device->uuid, orig_dev->uuid, sizeof(device->uuid));
-               spin_lock_init(&device->io_lock);
-               INIT_LIST_HEAD(&device->dev_list);
-               INIT_LIST_HEAD(&device->dev_alloc_list);
-
                list_add(&device->dev_list, &fs_devices->devices);
                device->fs_devices = fs_devices;
                fs_devices->num_devices++;
@@ -636,23 +673,22 @@ static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
 
                if (device->can_discard)
                        fs_devices->num_can_discard--;
+               if (device->missing)
+                       fs_devices->missing_devices--;
 
-               new_device = kmalloc(sizeof(*new_device), GFP_NOFS);
-               BUG_ON(!new_device); /* -ENOMEM */
-               memcpy(new_device, device, sizeof(*new_device));
+               new_device = btrfs_alloc_device(NULL, &device->devid,
+                                               device->uuid);
+               BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
 
                /* Safe because we are under uuid_mutex */
                if (device->name) {
                        name = rcu_string_strdup(device->name->str, GFP_NOFS);
-                       BUG_ON(device->name && !name); /* -ENOMEM */
+                       BUG_ON(!name); /* -ENOMEM */
                        rcu_assign_pointer(new_device->name, name);
                }
-               new_device->bdev = NULL;
-               new_device->writeable = 0;
-               new_device->in_fs_metadata = 0;
-               new_device->can_discard = 0;
-               spin_lock_init(&new_device->io_lock);
+
                list_replace_rcu(&device->dev_list, &new_device->dev_list);
+               new_device->fs_devices = device->fs_devices;
 
                call_rcu(&device->rcu, free_device);
        }
@@ -865,7 +901,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
        disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
 
        if (btrfs_super_bytenr(disk_super) != bytenr ||
-           disk_super->magic != cpu_to_le64(BTRFS_MAGIC))
+           btrfs_super_magic(disk_super) != BTRFS_MAGIC)
                goto error_unmap;
 
        devid = btrfs_stack_device_id(&disk_super->dev_item);
@@ -880,8 +916,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
                printk(KERN_INFO "device fsid %pU ", disk_super->fsid);
        }
 
-       printk(KERN_CONT "devid %llu transid %llu %s\n",
-              (unsigned long long)devid, (unsigned long long)transid, path);
+       printk(KERN_CONT "devid %llu transid %llu %s\n", devid, transid, path);
 
        ret = device_list_add(path, disk_super, devid, fs_devices_ret);
        if (!ret && fs_devices_ret)
@@ -1278,8 +1313,7 @@ static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
        btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
 
        write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid,
-                   (unsigned long)btrfs_dev_extent_chunk_tree_uuid(extent),
-                   BTRFS_UUID_SIZE);
+                   btrfs_dev_extent_chunk_tree_uuid(extent), BTRFS_UUID_SIZE);
 
        btrfs_set_dev_extent_length(leaf, extent, num_bytes);
        btrfs_mark_buffer_dirty(leaf);
@@ -1307,15 +1341,14 @@ static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
        return ret;
 }
 
-static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
+static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
+                                   u64 *devid_ret)
 {
        int ret;
        struct btrfs_key key;
        struct btrfs_key found_key;
        struct btrfs_path *path;
 
-       root = root->fs_info->chunk_root;
-
        path = btrfs_alloc_path();
        if (!path)
                return -ENOMEM;
@@ -1324,20 +1357,21 @@ static noinline int find_next_devid(struct btrfs_root *root, u64 *objectid)
        key.type = BTRFS_DEV_ITEM_KEY;
        key.offset = (u64)-1;
 
-       ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+       ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
 
        BUG_ON(ret == 0); /* Corruption */
 
-       ret = btrfs_previous_item(root, path, BTRFS_DEV_ITEMS_OBJECTID,
+       ret = btrfs_previous_item(fs_info->chunk_root, path,
+                                 BTRFS_DEV_ITEMS_OBJECTID,
                                  BTRFS_DEV_ITEM_KEY);
        if (ret) {
-               *objectid = 1;
+               *devid_ret = 1;
        } else {
                btrfs_item_key_to_cpu(path->nodes[0], &found_key,
                                      path->slots[0]);
-               *objectid = found_key.offset + 1;
+               *devid_ret = found_key.offset + 1;
        }
        ret = 0;
 error:
@@ -1391,9 +1425,9 @@ static int btrfs_add_device(struct btrfs_trans_handle *trans,
        btrfs_set_device_bandwidth(leaf, dev_item, 0);
        btrfs_set_device_start_offset(leaf, dev_item, 0);
 
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
-       ptr = (unsigned long)btrfs_device_fsid(dev_item);
+       ptr = btrfs_device_fsid(dev_item);
        write_extent_buffer(leaf, root->fs_info->fsid, ptr, BTRFS_UUID_SIZE);
        btrfs_mark_buffer_dirty(leaf);
 
@@ -1562,7 +1596,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                clear_super = true;
        }
 
+       mutex_unlock(&uuid_mutex);
        ret = btrfs_shrink_device(device, 0);
+       mutex_lock(&uuid_mutex);
        if (ret)
                goto error_undo;
 
@@ -1586,7 +1622,11 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
        /*
         * the device list mutex makes sure that we don't change
         * the device list while someone else is writing out all
-        * the device supers.
+        * the device supers. Whoever is writing all supers, should
+        * lock the device list mutex before getting the number of
+        * devices in the super block (super_copy). Conversely,
+        * whoever updates the number of devices in the super block
+        * (super_copy) should hold the device list mutex.
         */
 
        cur_devices = device->fs_devices;
@@ -1610,10 +1650,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
                device->fs_devices->open_devices--;
 
        call_rcu(&device->rcu, free_device);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        num_devices = btrfs_super_num_devices(root->fs_info->super_copy) - 1;
        btrfs_set_super_num_devices(root->fs_info->super_copy, num_devices);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        if (cur_devices->open_devices == 0) {
                struct btrfs_fs_devices *fs_devices;
@@ -1793,9 +1833,9 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        if (!fs_devices->seeding)
                return -EINVAL;
 
-       seed_devices = kzalloc(sizeof(*fs_devices), GFP_NOFS);
-       if (!seed_devices)
-               return -ENOMEM;
+       seed_devices = __alloc_fs_devices();
+       if (IS_ERR(seed_devices))
+               return PTR_ERR(seed_devices);
 
        old_devices = clone_fs_devices(fs_devices);
        if (IS_ERR(old_devices)) {
@@ -1814,7 +1854,6 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
                              synchronize_rcu);
-       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
        list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
        list_for_each_entry(device, &seed_devices->devices, dev_list) {
@@ -1830,6 +1869,8 @@ static int btrfs_prepare_sprout(struct btrfs_root *root)
        generate_random_uuid(fs_devices->fsid);
        memcpy(root->fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
        memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
+       mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
+
        super_flags = btrfs_super_flags(disk_super) &
                      ~BTRFS_SUPER_FLAG_SEEDING;
        btrfs_set_super_flags(disk_super, super_flags);
@@ -1889,11 +1930,9 @@ next_slot:
                dev_item = btrfs_item_ptr(leaf, path->slots[0],
                                          struct btrfs_dev_item);
                devid = btrfs_device_id(leaf, dev_item);
-               read_extent_buffer(leaf, dev_uuid,
-                                  (unsigned long)btrfs_device_uuid(dev_item),
+               read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                                   BTRFS_UUID_SIZE);
-               read_extent_buffer(leaf, fs_uuid,
-                                  (unsigned long)btrfs_device_fsid(dev_item),
+               read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                                   BTRFS_UUID_SIZE);
                device = btrfs_find_device(root->fs_info, devid, dev_uuid,
                                           fs_uuid);
@@ -1956,10 +1995,10 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        }
        mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
+       device = btrfs_alloc_device(root->fs_info, NULL, NULL);
+       if (IS_ERR(device)) {
                /* we can safely leave the fs_devices entry around */
-               ret = -ENOMEM;
+               ret = PTR_ERR(device);
                goto error;
        }
 
@@ -1971,13 +2010,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        }
        rcu_assign_pointer(device->name, name);
 
-       ret = find_next_devid(root, &device->devid);
-       if (ret) {
-               rcu_string_free(device->name);
-               kfree(device);
-               goto error;
-       }
-
        trans = btrfs_start_transaction(root, 0);
        if (IS_ERR(trans)) {
                rcu_string_free(device->name);
@@ -1992,9 +2024,6 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
        if (blk_queue_discard(q))
                device->can_discard = 1;
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       spin_lock_init(&device->io_lock);
        device->generation = trans->transid;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@ -2121,6 +2150,7 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct list_head *devices;
        struct rcu_string *name;
+       u64 devid = BTRFS_DEV_REPLACE_DEVID;
        int ret = 0;
 
        *device_out = NULL;
@@ -2142,9 +2172,9 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
                }
        }
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device) {
-               ret = -ENOMEM;
+       device = btrfs_alloc_device(NULL, &devid, NULL);
+       if (IS_ERR(device)) {
+               ret = PTR_ERR(device);
                goto error;
        }
 
@@ -2161,10 +2191,6 @@ int btrfs_init_dev_replace_tgtdev(struct btrfs_root *root, char *device_path,
                device->can_discard = 1;
        mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
        device->writeable = 1;
-       device->work.func = pending_bios_fn;
-       generate_random_uuid(device->uuid);
-       device->devid = BTRFS_DEV_REPLACE_DEVID;
-       spin_lock_init(&device->io_lock);
        device->generation = 0;
        device->io_width = root->sectorsize;
        device->io_align = root->sectorsize;
@@ -2971,10 +2997,6 @@ again:
                if (found_key.objectid != key.objectid)
                        break;
 
-               /* chunk zero is special */
-               if (found_key.offset == 0)
-                       break;
-
                chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
 
                if (!counting) {
@@ -3010,6 +3032,8 @@ again:
                        spin_unlock(&fs_info->balance_lock);
                }
 loop:
+               if (found_key.offset == 0)
+                       break;
                key.offset = found_key.offset - 1;
        }
 
@@ -3074,9 +3098,6 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
        atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
 }
 
-void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
-                              struct btrfs_ioctl_balance_args *bargs);
-
 /*
  * Should be called with both balance and volume mutexes held
  */
@@ -3139,7 +3160,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->data.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "data profile %llu\n",
-                      (unsigned long long)bctl->data.target);
+                      bctl->data.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3148,7 +3169,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->meta.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "metadata profile %llu\n",
-                      (unsigned long long)bctl->meta.target);
+                      bctl->meta.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3157,7 +3178,7 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
             (bctl->sys.target & ~allowed))) {
                printk(KERN_ERR "btrfs: unable to start balance with target "
                       "system profile %llu\n",
-                      (unsigned long long)bctl->sys.target);
+                      bctl->sys.target);
                ret = -EINVAL;
                goto out;
        }
@@ -3430,6 +3451,264 @@ int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
        return 0;
 }
 
+static int btrfs_uuid_scan_kthread(void *data)
+{
+       struct btrfs_fs_info *fs_info = data;
+       struct btrfs_root *root = fs_info->tree_root;
+       struct btrfs_key key;
+       struct btrfs_key max_key;
+       struct btrfs_path *path = NULL;
+       int ret = 0;
+       struct extent_buffer *eb;
+       int slot;
+       struct btrfs_root_item root_item;
+       u32 item_size;
+       struct btrfs_trans_handle *trans = NULL;
+
+       path = btrfs_alloc_path();
+       if (!path) {
+               ret = -ENOMEM;
+               goto out;
+       }
+
+       key.objectid = 0;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = 0;
+
+       max_key.objectid = (u64)-1;
+       max_key.type = BTRFS_ROOT_ITEM_KEY;
+       max_key.offset = (u64)-1;
+
+       path->keep_locks = 1;
+
+       while (1) {
+               ret = btrfs_search_forward(root, &key, &max_key, path, 0);
+               if (ret) {
+                       if (ret > 0)
+                               ret = 0;
+                       break;
+               }
+
+               if (key.type != BTRFS_ROOT_ITEM_KEY ||
+                   (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
+                    key.objectid != BTRFS_FS_TREE_OBJECTID) ||
+                   key.objectid > BTRFS_LAST_FREE_OBJECTID)
+                       goto skip;
+
+               eb = path->nodes[0];
+               slot = path->slots[0];
+               item_size = btrfs_item_size_nr(eb, slot);
+               if (item_size < sizeof(root_item))
+                       goto skip;
+
+               read_extent_buffer(eb, &root_item,
+                                  btrfs_item_ptr_offset(eb, slot),
+                                  (int)sizeof(root_item));
+               if (btrfs_root_refs(&root_item) == 0)
+                       goto skip;
+
+               if (!btrfs_is_empty_uuid(root_item.uuid) ||
+                   !btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       if (trans)
+                               goto update_tree;
+
+                       btrfs_release_path(path);
+                       /*
+                        * 1 - subvol uuid item
+                        * 1 - received_subvol uuid item
+                        */
+                       trans = btrfs_start_transaction(fs_info->uuid_root, 2);
+                       if (IS_ERR(trans)) {
+                               ret = PTR_ERR(trans);
+                               break;
+                       }
+                       continue;
+               } else {
+                       goto skip;
+               }
+update_tree:
+               if (!btrfs_is_empty_uuid(root_item.uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.uuid,
+                                                 BTRFS_UUID_KEY_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+
+               if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
+                       ret = btrfs_uuid_tree_add(trans, fs_info->uuid_root,
+                                                 root_item.received_uuid,
+                                                BTRFS_UUID_KEY_RECEIVED_SUBVOL,
+                                                 key.objectid);
+                       if (ret < 0) {
+                               pr_warn("btrfs: uuid_tree_add failed %d\n",
+                                       ret);
+                               break;
+                       }
+               }
+
+skip:
+               if (trans) {
+                       ret = btrfs_end_transaction(trans, fs_info->uuid_root);
+                       trans = NULL;
+                       if (ret)
+                               break;
+               }
+
+               btrfs_release_path(path);
+               if (key.offset < (u64)-1) {
+                       key.offset++;
+               } else if (key.type < BTRFS_ROOT_ITEM_KEY) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+               } else if (key.objectid < (u64)-1) {
+                       key.offset = 0;
+                       key.type = BTRFS_ROOT_ITEM_KEY;
+                       key.objectid++;
+               } else {
+                       break;
+               }
+               cond_resched();
+       }
+
+out:
+       btrfs_free_path(path);
+       if (trans && !IS_ERR(trans))
+               btrfs_end_transaction(trans, fs_info->uuid_root);
+       if (ret)
+               pr_warn("btrfs: btrfs_uuid_scan_kthread failed %d\n", ret);
+       else
+               fs_info->update_uuid_tree_gen = 1;
+       up(&fs_info->uuid_tree_rescan_sem);
+       return 0;
+}
+
+/*
+ * Callback for btrfs_uuid_tree_iterate().
+ * returns:
+ * 0   check succeeded, the entry is not outdated.
+ * < 0 if an error occured.
+ * > 0 if the check failed, which means the caller shall remove the entry.
+ */
+static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
+                                      u8 *uuid, u8 type, u64 subid)
+{
+       struct btrfs_key key;
+       int ret = 0;
+       struct btrfs_root *subvol_root;
+
+       if (type != BTRFS_UUID_KEY_SUBVOL &&
+           type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
+               goto out;
+
+       key.objectid = subid;
+       key.type = BTRFS_ROOT_ITEM_KEY;
+       key.offset = (u64)-1;
+       subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
+       if (IS_ERR(subvol_root)) {
+               ret = PTR_ERR(subvol_root);
+               if (ret == -ENOENT)
+                       ret = 1;
+               goto out;
+       }
+
+       switch (type) {
+       case BTRFS_UUID_KEY_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
+               if (memcmp(uuid, subvol_root->root_item.received_uuid,
+                          BTRFS_UUID_SIZE))
+                       ret = 1;
+               break;
+       }
+
+out:
+       return ret;
+}
+
+static int btrfs_uuid_rescan_kthread(void *data)
+{
+       struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
+       int ret;
+
+       /*
+        * 1st step is to iterate through the existing UUID tree and
+        * to delete all entries that contain outdated data.
+        * 2nd step is to add all missing entries to the UUID tree.
+        */
+       ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
+       if (ret < 0) {
+               pr_warn("btrfs: iterating uuid_tree failed %d\n", ret);
+               up(&fs_info->uuid_tree_rescan_sem);
+               return ret;
+       }
+       return btrfs_uuid_scan_kthread(data);
+}
+
+int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+       struct btrfs_trans_handle *trans;
+       struct btrfs_root *tree_root = fs_info->tree_root;
+       struct btrfs_root *uuid_root;
+       struct task_struct *task;
+       int ret;
+
+       /*
+        * 1 - root node
+        * 1 - root item
+        */
+       trans = btrfs_start_transaction(tree_root, 2);
+       if (IS_ERR(trans))
+               return PTR_ERR(trans);
+
+       uuid_root = btrfs_create_tree(trans, fs_info,
+                                     BTRFS_UUID_TREE_OBJECTID);
+       if (IS_ERR(uuid_root)) {
+               btrfs_abort_transaction(trans, tree_root,
+                                       PTR_ERR(uuid_root));
+               return PTR_ERR(uuid_root);
+       }
+
+       fs_info->uuid_root = uuid_root;
+
+       ret = btrfs_commit_transaction(trans, tree_root);
+       if (ret)
+               return ret;
+
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_scan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+
+       return 0;
+}
+
+int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
+{
+       struct task_struct *task;
+
+       down(&fs_info->uuid_tree_rescan_sem);
+       task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
+       if (IS_ERR(task)) {
+               /* fs_info->update_uuid_tree_gen remains 0 in all error case */
+               pr_warn("btrfs: failed to start uuid_rescan task\n");
+               up(&fs_info->uuid_tree_rescan_sem);
+               return PTR_ERR(task);
+       }
+
+       return 0;
+}
+
 /*
  * shrinking a device means finding all of the device extents past
  * the new size, and then following the back refs to the chunks.
@@ -4194,13 +4473,13 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
         * and exit, so return 1 so the callers don't try to use other copies.
         */
        if (!em) {
-               btrfs_emerg(fs_info, "No mapping for %Lu-%Lu\n", logical,
+               btrfs_crit(fs_info, "No mapping for %Lu-%Lu\n", logical,
                            logical+len);
                return 1;
        }
 
        if (em->start > logical || em->start + em->len < logical) {
-               btrfs_emerg(fs_info, "Invalid mapping for %Lu-%Lu, got "
+               btrfs_crit(fs_info, "Invalid mapping for %Lu-%Lu, got "
                            "%Lu-%Lu\n", logical, logical+len, em->start,
                            em->start + em->len);
                return 1;
@@ -4375,8 +4654,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
 
        if (!em) {
                btrfs_crit(fs_info, "unable to find logical %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)*length);
+                       logical, *length);
                return -EINVAL;
        }
 
@@ -4671,6 +4949,7 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
        }
        bbio = kzalloc(btrfs_bio_size(num_alloc_stripes), GFP_NOFS);
        if (!bbio) {
+               kfree(raid_map);
                ret = -ENOMEM;
                goto out;
        }
@@ -5246,9 +5525,7 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio,
 
        if (map_length < length) {
                btrfs_crit(root->fs_info, "mapping failed logical %llu bio len %llu len %llu",
-                       (unsigned long long)logical,
-                       (unsigned long long)length,
-                       (unsigned long long)map_length);
+                       logical, length, map_length);
                BUG();
        }
 
@@ -5314,23 +5591,72 @@ static struct btrfs_device *add_missing_dev(struct btrfs_root *root,
        struct btrfs_device *device;
        struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices;
 
-       device = kzalloc(sizeof(*device), GFP_NOFS);
-       if (!device)
+       device = btrfs_alloc_device(NULL, &devid, dev_uuid);
+       if (IS_ERR(device))
                return NULL;
-       list_add(&device->dev_list,
-                &fs_devices->devices);
-       device->devid = devid;
-       device->work.func = pending_bios_fn;
+
+       list_add(&device->dev_list, &fs_devices->devices);
        device->fs_devices = fs_devices;
-       device->missing = 1;
        fs_devices->num_devices++;
+
+       device->missing = 1;
        fs_devices->missing_devices++;
-       spin_lock_init(&device->io_lock);
-       INIT_LIST_HEAD(&device->dev_alloc_list);
-       memcpy(device->uuid, dev_uuid, BTRFS_UUID_SIZE);
+
        return device;
 }
 
+/**
+ * btrfs_alloc_device - allocate struct btrfs_device
+ * @fs_info:   used only for generating a new devid, can be NULL if
+ *             devid is provided (i.e. @devid != NULL).
+ * @devid:     a pointer to devid for this device.  If NULL a new devid
+ *             is generated.
+ * @uuid:      a pointer to UUID for this device.  If NULL a new UUID
+ *             is generated.
+ *
+ * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
+ * on error.  Returned struct is not linked onto any lists and can be
+ * destroyed with kfree() right away.
+ */
+struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+                                       const u64 *devid,
+                                       const u8 *uuid)
+{
+       struct btrfs_device *dev;
+       u64 tmp;
+
+       if (!devid && !fs_info) {
+               WARN_ON(1);
+               return ERR_PTR(-EINVAL);
+       }
+
+       dev = __alloc_device();
+       if (IS_ERR(dev))
+               return dev;
+
+       if (devid)
+               tmp = *devid;
+       else {
+               int ret;
+
+               ret = find_next_devid(fs_info, &tmp);
+               if (ret) {
+                       kfree(dev);
+                       return ERR_PTR(ret);
+               }
+       }
+       dev->devid = tmp;
+
+       if (uuid)
+               memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
+       else
+               generate_random_uuid(dev->uuid);
+
+       dev->work.func = pending_bios_fn;
+
+       return dev;
+}
+
 static int read_one_chunk(struct btrfs_root *root, struct btrfs_key *key,
                          struct extent_buffer *leaf,
                          struct btrfs_chunk *chunk)
@@ -5437,7 +5763,7 @@ static void fill_device_from_item(struct extent_buffer *leaf,
        WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
        device->is_tgtdev_for_dev_replace = 0;
 
-       ptr = (unsigned long)btrfs_device_uuid(dev_item);
+       ptr = btrfs_device_uuid(dev_item);
        read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
 }
 
@@ -5500,11 +5826,9 @@ static int read_one_dev(struct btrfs_root *root,
        u8 dev_uuid[BTRFS_UUID_SIZE];
 
        devid = btrfs_device_id(leaf, dev_item);
-       read_extent_buffer(leaf, dev_uuid,
-                          (unsigned long)btrfs_device_uuid(dev_item),
+       read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
                           BTRFS_UUID_SIZE);
-       read_extent_buffer(leaf, fs_uuid,
-                          (unsigned long)btrfs_device_fsid(dev_item),
+       read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
                           BTRFS_UUID_SIZE);
 
        if (memcmp(fs_uuid, root->fs_info->fsid, BTRFS_UUID_SIZE)) {
@@ -5519,8 +5843,7 @@ static int read_one_dev(struct btrfs_root *root,
                        return -EIO;
 
                if (!device) {
-                       btrfs_warn(root->fs_info, "devid %llu missing",
-                               (unsigned long long)devid);
+                       btrfs_warn(root->fs_info, "devid %llu missing", devid);
                        device = add_missing_dev(root, devid, dev_uuid);
                        if (!device)
                                return -ENOMEM;
@@ -5644,14 +5967,15 @@ int btrfs_read_chunk_tree(struct btrfs_root *root)
        mutex_lock(&uuid_mutex);
        lock_chunks(root);
 
-       /* first we search for all of the device items, and then we
-        * read in all of the chunk items.  This way we can create chunk
-        * mappings that reference all of the devices that are afound
+       /*
+        * Read all device items, and then all the chunk items. All
+        * device items are found before any chunk item (their object id
+        * is smaller than the lowest possible object id for a chunk
+        * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
         */
        key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
        key.offset = 0;
        key.type = 0;
-again:
        ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
        if (ret < 0)
                goto error;
@@ -5667,17 +5991,13 @@ again:
                        break;
                }
                btrfs_item_key_to_cpu(leaf, &found_key, slot);
-               if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-                       if (found_key.objectid != BTRFS_DEV_ITEMS_OBJECTID)
-                               break;
-                       if (found_key.type == BTRFS_DEV_ITEM_KEY) {
-                               struct btrfs_dev_item *dev_item;
-                               dev_item = btrfs_item_ptr(leaf, slot,
+               if (found_key.type == BTRFS_DEV_ITEM_KEY) {
+                       struct btrfs_dev_item *dev_item;
+                       dev_item = btrfs_item_ptr(leaf, slot,
                                                  struct btrfs_dev_item);
-                               ret = read_one_dev(root, leaf, dev_item);
-                               if (ret)
-                                       goto error;
-                       }
+                       ret = read_one_dev(root, leaf, dev_item);
+                       if (ret)
+                               goto error;
                } else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
                        struct btrfs_chunk *chunk;
                        chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
@@ -5687,11 +6007,6 @@ again:
                }
                path->slots[0]++;
        }
-       if (key.objectid == BTRFS_DEV_ITEMS_OBJECTID) {
-               key.objectid = 0;
-               btrfs_release_path(path);
-               goto again;
-       }
        ret = 0;
 error:
        unlock_chunks(root);
index 8670558..b72f540 100644 (file)
@@ -152,6 +152,8 @@ struct btrfs_fs_devices {
        int rotating;
 };
 
+#define BTRFS_BIO_INLINE_CSUM_SIZE     64
+
 /*
  * we need the mirror number and stripe index to be passed around
  * the call chain while we are processing end_io (especially errors).
@@ -161,9 +163,14 @@ struct btrfs_fs_devices {
  * we allocate are actually btrfs_io_bios.  We'll cram as much of
  * struct btrfs_bio as we can into this over time.
  */
+typedef void (btrfs_io_bio_end_io_t) (struct btrfs_io_bio *bio, int err);
 struct btrfs_io_bio {
        unsigned long mirror_num;
        unsigned long stripe_index;
+       u8 *csum;
+       u8 csum_inline[BTRFS_BIO_INLINE_CSUM_SIZE];
+       u8 *csum_allocated;
+       btrfs_io_bio_end_io_t *end_io;
        struct bio bio;
 };
 
@@ -298,6 +305,9 @@ void btrfs_close_extra_devices(struct btrfs_fs_info *fs_info,
 int btrfs_find_device_missing_or_by_path(struct btrfs_root *root,
                                         char *device_path,
                                         struct btrfs_device **device);
+struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
+                                       const u64 *devid,
+                                       const u8 *uuid);
 int btrfs_rm_device(struct btrfs_root *root, char *device_path);
 void btrfs_cleanup_fs_uuids(void);
 int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
@@ -315,6 +325,8 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
 int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
 int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
 int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
+int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info);
+int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info);
 int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset);
 int find_free_dev_extent(struct btrfs_trans_handle *trans,
                         struct btrfs_device *device, u64 num_bytes,
diff --git a/fs/cifs/AUTHORS b/fs/cifs/AUTHORS
deleted file mode 100644 (file)
index ea940b1..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-Original Author
-===============
-Steve French (sfrench@samba.org)
-
-The author wishes to express his appreciation and thanks to:
-Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
-improvements. Thanks to IBM for allowing me time and test resources to pursue
-this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
-the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
-Jeremy Allison of the Samba team has done invaluable work in adding the server
-side of the original CIFS Unix extensions and reviewing and implementing
-portions of the newer CIFS POSIX extensions into the Samba 3 file server. Thank
-Dave Boutcher of IBM Rochester (author of the OS/400 smb/cifs filesystem client)
-for proving years ago that very good smb/cifs clients could be done on Unix-like
-operating systems.  Volker Lendecke, Andrew Tridgell, Urban Widmark, John 
-Newbigin and others for their work on the Linux smbfs module.  Thanks to
-the other members of the Storage Network Industry Association CIFS Technical
-Workgroup for their work specifying this highly complex protocol and finally
-thanks to the Samba team for their technical advice and encouragement.
-
-Patch Contributors
-------------------
-Zwane Mwaikambo
-Andi Kleen
-Amrut Joshi
-Shobhit Dayal
-Sergey Vlasov
-Richard Hughes
-Yury Umanets
-Mark Hamzy (for some of the early cifs IPv6 work)
-Domen Puncer
-Jesper Juhl (in particular for lots of whitespace/formatting cleanup)
-Vince Negri and Dave Stahl (for finding an important caching bug)
-Adrian Bunk (kcalloc cleanups)
-Miklos Szeredi 
-Kazeon team for various fixes especially for 2.4 version.
-Asser Ferno (Change Notify support)
-Shaggy (Dave Kleikamp) for innumerable small fs suggestions and some good cleanup
-Gunter Kukkukk (testing and suggestions for support of old servers)
-Igor Mammedov (DFS support)
-Jeff Layton (many, many fixes, as well as great work on the cifs Kerberos code)
-
-Test case and Bug Report contributors
--------------------------------------
-Thanks to those in the community who have submitted detailed bug reports
-and debug of problems they have found:  Jochen Dolze, David Blaine,
-Rene Scharfe, Martin Josefsson, Alexander Wild, Anthony Liguori,
-Lars Muller, Urban Widmark, Massimiliano Ferrero, Howard Owen,
-Olaf Kirch, Kieron Briggs, Nick Millington and others. Also special
-mention to the Stanford Checker (SWAT) which pointed out many minor
-bugs in error paths.  Valuable suggestions also have come from Al Viro
-and Dave Miller.
-
-And thanks to the IBM LTC and Power test teams and SuSE testers for
-finding multiple bugs during excellent stress test runs.
diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
deleted file mode 100644 (file)
index bc0025c..0000000
+++ /dev/null
@@ -1,1065 +0,0 @@
-Version 1.62
-------------
-Add sockopt=TCP_NODELAY mount option. EA (xattr) routines hardened
-to more strictly handle corrupt frames.
-
-Version 1.61
-------------
-Fix append problem to Samba servers (files opened with O_APPEND could
-have duplicated data). Fix oops in cifs_lookup. Workaround problem
-mounting to OS/400 Netserve. Fix oops in cifs_get_tcp_session.
-Disable use of server inode numbers when server only
-partially supports them (e.g. for one server querying inode numbers on
-FindFirst fails but QPathInfo queries works). Fix oops with dfs in 
-cifs_put_smb_ses. Fix mmap to work on directio mounts (needed
-for OpenOffice when on forcedirectio mount e.g.)
-
-Version 1.60
--------------
-Fix memory leak in reconnect.  Fix oops in DFS mount error path.
-Set s_maxbytes to smaller (the max that vfs can handle) so that
-sendfile will now work over cifs mounts again.  Add noforcegid
-and noforceuid mount parameters. Fix small mem leak when using
-ntlmv2. Fix 2nd mount to same server but with different port to
-be allowed (rather than reusing the 1st port) - only when the
-user explicitly overrides the port on the 2nd mount.
-
-Version 1.59
-------------
-Client uses server inode numbers (which are persistent) rather than
-client generated ones by default (mount option "serverino" turned
-on by default if server supports it).  Add forceuid and forcegid
-mount options (so that when negotiating unix extensions specifying
-which uid mounted does not immediately force the server's reported
-uids to be overridden).  Add support for scope mount parm. Improve
-hard link detection to use same inode for both.  Do not set
-read-only dos attribute on directories (for chmod) since Windows
-explorer special cases this attribute bit for directories for
-a different purpose.
-
-Version 1.58
-------------
-Guard against buffer overruns in various UCS-2 to UTF-8 string conversions
-when the UTF-8 string is composed of unusually long (more than 4 byte) converted
-characters. Add support for mounting root of a share which redirects immediately
-to DFS target. Convert string conversion functions from Unicode to more
-accurately mark string length before allocating memory (which may help the
-rare cases where a UTF-8 string is much larger than the UCS2 string that
-we converted from).  Fix endianness of the vcnum field used during
-session setup to distinguish multiple mounts to same server from different
-userids. Raw NTLMSSP fixed (it requires /proc/fs/cifs/experimental
-flag to be set to 2, and mount must enable krb5 to turn on extended security).
-Performance of file create to Samba improved (posix create on lookup
-removes 1 of 2 network requests sent on file create)
-Version 1.57
-------------
-Improve support for multiple security contexts to the same server. We
-used to use the same "vcnumber" for all connections which could cause
-the server to treat subsequent connections, especially those that
-are authenticated as guest, as reconnections, invalidating the earlier
-user's smb session.  This fix allows cifs to mount multiple times to the
-same server with different userids without risking invalidating earlier
-established security contexts.  fsync now sends SMB Flush operation
-to better ensure that we wait for server to write all of the data to
-server disk (not just write it over the network).  Add new mount
-parameter to allow user to disable sending the (slow) SMB flush on
-fsync if desired (fsync still flushes all cached write data to the server).
-Posix file open support added (turned off after one attempt if server
-fails to support it properly, as with Samba server versions prior to 3.3.2)
-Fix "redzone overwritten" bug in cifs_put_tcon (CIFSTcon may allocate too
-little memory for the "nativeFileSystem" field returned by the server
-during mount).  Endian convert inode numbers if necessary (makes it easier
-to compare inode numbers on network files from big endian systems). 
-
-Version 1.56
-------------
-Add "forcemandatorylock" mount option to allow user to use mandatory
-rather than posix (advisory) byte range locks, even though server would
-support posix byte range locks.  Fix query of root inode when prefixpath
-specified and user does not have access to query information about the
-top of the share.  Fix problem in 2.6.28 resolving DFS paths to
-Samba servers (worked to Windows).  Fix rmdir so that pending search
-(readdir) requests do not get invalid results which include the now
-removed directory.  Fix oops in cifs_dfs_ref.c when prefixpath is not reachable
-when using DFS.  Add better file create support to servers which support
-the CIFS POSIX protocol extensions (this adds support for new flags
-on create, and improves semantics for write of locked ranges).
-
-Version 1.55
-------------
-Various fixes to make delete of open files behavior more predictable
-(when delete of an open file fails we mark the file as "delete-on-close"
-in a way that more servers accept, but only if we can first rename the
-file to a temporary name).  Add experimental support for more safely
-handling fcntl(F_SETLEASE).  Convert cifs to using blocking tcp
-sends, and also let tcp autotune the socket send and receive buffers.
-This reduces the number of EAGAIN errors returned by TCP/IP in
-high stress workloads (and the number of retries on socket writes
-when sending large SMBWriteX requests).  Fix case in which a portion of
-data can in some cases not get written to the file on the server before the
-file is closed.  Fix DFS parsing to properly handle path consumed field,
-and to handle certain codepage conversions better.  Fix mount and
-umount race that can cause oops in mount or umount or reconnect.
-
-Version 1.54
-------------
-Fix premature write failure on congested networks (we would give up
-on EAGAIN from the socket too quickly on large writes).
-Cifs_mkdir and cifs_create now respect the setgid bit on parent dir.
-Fix endian problems in acl (mode from/to cifs acl) on bigendian
-architectures.  Fix problems with preserving timestamps on copying open
-files (e.g. "cp -a") to Windows servers.  For mkdir and create honor setgid bit
-on parent directory when server supports Unix Extensions but not POSIX
-create. Update cifs.upcall version to handle new Kerberos sec flags
-(this requires update of cifs.upcall program from Samba).  Fix memory leak
-on dns_upcall (resolving DFS referralls).  Fix plain text password
-authentication (requires setting SecurityFlags to 0x30030 to enable
-lanman and plain text though).  Fix writes to be at correct offset when
-file is open with O_APPEND and file is on a directio (forcediretio) mount.
-Fix bug in rewinding readdir directory searches.  Add nodfs mount option.
-
-Version 1.53
-------------
-DFS support added (Microsoft Distributed File System client support needed
-for referrals which enable a hierarchical name space among servers).
-Disable temporary caching of mode bits to servers which do not support
-storing of mode (e.g. Windows servers, when client mounts without cifsacl
-mount option) and add new "dynperm" mount option to enable temporary caching
-of mode (enable old behavior).  Fix hang on mount caused when server crashes
-tcp session during negotiate protocol.
-
-Version 1.52
-------------
-Fix oops on second mount to server when null auth is used.
-Enable experimental Kerberos support.  Return writebehind errors on flush
-and sync so that events like out of disk space get reported properly on
-cached files. Fix setxattr failure to certain Samba versions. Fix mount
-of second share to disconnected server session (autoreconnect on this).
-Add ability to modify cifs acls for handling chmod (when mounted with
-cifsacl flag). Fix prefixpath path separator so we can handle mounts
-with prefixpaths longer than one directory (one path component) when
-mounted to Windows servers.  Fix slow file open when cifsacl
-enabled. Fix memory leak in FindNext when the SMB call returns -EBADF.
-
-
-Version 1.51
-------------
-Fix memory leak in statfs when mounted to very old servers (e.g.
-Windows 9x).  Add new feature "POSIX open" which allows servers
-which support the current POSIX Extensions to provide better semantics
-(e.g. delete for open files opened with posix open).  Take into
-account umask on posix mkdir not just older style mkdir.  Add
-ability to mount to IPC$ share (which allows CIFS named pipes to be
-opened, read and written as if they were files).  When 1st tree
-connect fails (e.g. due to signing negotiation failure) fix
-leak that causes cifsd not to stop and rmmod to fail to cleanup
-cifs_request_buffers pool. Fix problem with POSIX Open/Mkdir on
-bigendian architectures. Fix possible memory corruption when
-EAGAIN returned on kern_recvmsg. Return better error if server
-requires packet signing but client has disabled it. When mounted
-with cifsacl mount option - mode bits are approximated based
-on the contents of the ACL of the file or directory. When cifs
-mount helper is missing convert make sure that UNC name 
-has backslash (not forward slash) between ip address of server
-and the share name.
-
-Version 1.50
-------------
-Fix NTLMv2 signing. NFS server mounted over cifs works (if cifs mount is
-done with "serverino" mount option).  Add support for POSIX Unlink
-(helps with certain sharing violation cases when server such as
-Samba supports newer POSIX CIFS Protocol Extensions). Add "nounix"
-mount option to allow disabling the CIFS Unix Extensions for just
-that mount. Fix hang on spinlock in find_writable_file (race when
-reopening file after session crash).  Byte range unlock request to
-windows server could unlock more bytes (on server copy of file)
-than intended if start of unlock request is well before start of
-a previous byte range lock that we issued.
-
-Version 1.49
-------------
-IPv6 support.  Enable ipv6 addresses to be passed on mount (put the ipv6
-address after the "ip=" mount option, at least until mount.cifs is fixed to
-handle DNS host to ipv6 name translation).  Accept override of uid or gid
-on mount even when Unix Extensions are negotiated (it used to be ignored
-when Unix Extensions were ignored).  This allows users to override the
-default uid and gid for files when they are certain that the uids or
-gids on the server do not match those of the client.  Make "sec=none"
-mount override username (so that null user connection is attempted)
-to match what documentation said. Support for very large reads, over 127K,
-available to some newer servers (such as Samba 3.0.26 and later but
-note that it also requires setting CIFSMaxBufSize at module install
-time to a larger value which may hurt performance in some cases).
-Make sign option force signing (or fail if server does not support it).
-
-Version 1.48
-------------
-Fix mtime bouncing around from local idea of last write times to remote time.
-Fix hang (in i_size_read) when simultaneous size update of same remote file
-on smp system corrupts sequence number. Do not reread unnecessarily partial page
-(which we are about to overwrite anyway) when writing out file opened rw.
-When DOS attribute of file on non-Unix server's file changes on the server side
-from read-only back to read-write, reflect this change in default file mode
-(we had been leaving a file's mode read-only until the inode were reloaded).
-Allow setting of attribute back to ATTR_NORMAL (removing readonly dos attribute
-when archive dos attribute not set and we are changing mode back to writeable
-on server which does not support the Unix Extensions).  Remove read only dos
-attribute on chmod when adding any write permission (ie on any of
-user/group/other (not all of user/group/other ie  0222) when
-mounted to windows.  Add support for POSIX MkDir (slight performance
-enhancement and eliminates the network race between the mkdir and set 
-path info of the mode).
-
-
-Version 1.47
-------------
-Fix oops in list_del during mount caused by unaligned string.
-Fix file corruption which could occur on some large file
-copies caused by writepages page i/o completion bug.
-Seek to SEEK_END forces check for update of file size for non-cached
-files. Allow file size to be updated on remote extend of locally open,
-non-cached file.  Fix reconnect to newer Samba servers (or other servers
-which support the CIFS Unix/POSIX extensions) so that we again tell the
-server the Unix/POSIX cifs capabilities which we support (SetFSInfo).
-Add experimental support for new POSIX Open/Mkdir (which returns
-stat information on the open, and allows setting the mode).
-
-Version 1.46
-------------
-Support deep tree mounts.  Better support OS/2, Win9x (DOS) time stamps.
-Allow null user to be specified on mount ("username="). Do not return
-EINVAL on readdir when filldir fails due to overwritten blocksize
-(fixes FC problem).  Return error in rename 2nd attempt retry (ie report
-if rename by handle also fails, after rename by path fails, we were
-not reporting whether the retry worked or not). Fix NTLMv2 to
-work to Windows servers (mount with option "sec=ntlmv2").
-
-Version 1.45
-------------
-Do not time out lockw calls when using posix extensions. Do not
-time out requests if server still responding reasonably fast
-on requests on other threads.  Improve POSIX locking emulation,
-(lock cancel now works, and unlock of merged range works even
-to Windows servers now).  Fix oops on mount to lanman servers
-(win9x, os/2 etc.) when null password.  Do not send listxattr
-(SMB to query all EAs) if nouser_xattr specified.  Fix SE Linux
-problem (instantiate inodes/dentries in right order for readdir).
-
-Version 1.44
-------------
-Rewritten sessionsetup support, including support for legacy SMB
-session setup needed for OS/2 and older servers such as Windows 95 and 98.
-Fix oops on ls to OS/2 servers.  Add support for level 1 FindFirst
-so we can do search (ls etc.) to OS/2.  Do not send NTCreateX
-or recent levels of FindFirst unless server says it supports NT SMBs
-(instead use legacy equivalents from LANMAN dialect). Fix to allow
-NTLMv2 authentication support (now can use stronger password hashing
-on mount if corresponding /proc/fs/cifs/SecurityFlags is set (0x4004).
-Allow override of global cifs security flags on mount via "sec=" option(s).
-
-Version 1.43
-------------
-POSIX locking to servers which support CIFS POSIX Extensions
-(disabled by default controlled by proc/fs/cifs/Experimental).
-Handle conversion of long share names (especially Asian languages)
-to Unicode during mount. Fix memory leak in sess struct on reconnect.
-Fix rare oops after acpi suspend.  Fix O_TRUNC opens to overwrite on
-cifs open which helps rare case when setpathinfo fails or server does
-not support it. 
-
-Version 1.42
-------------
-Fix slow oplock break when mounted to different servers at the same time and
-the tids match and we try to find matching fid on wrong server. Fix read
-looping when signing required by server (2.6.16 kernel only). Fix readdir
-vs. rename race which could cause each to hang. Return . and .. even
-if server does not.  Allow searches to skip first three entries and
-begin at any location. Fix oops in find_writeable_file.
-
-Version 1.41
-------------
-Fix NTLMv2 security (can be enabled in /proc/fs/cifs) so customers can
-configure stronger authentication.  Fix sfu symlinks so they can
-be followed (not just recognized).  Fix wraparound of bcc on
-read responses when buffer size over 64K and also fix wrap of
-max smb buffer size when CIFSMaxBufSize over 64K.  Fix oops in
-cifs_user_read and cifs_readpages (when EAGAIN on send of smb
-on socket is returned over and over).  Add POSIX (advisory) byte range
-locking support (requires server with newest CIFS UNIX Extensions
-to the protocol implemented). Slow down negprot slightly in port 139
-RFC1001 case to give session_init time on buggy servers.
-
-Version 1.40
-------------
-Use fsuid (fsgid) more consistently instead of uid (gid). Improve performance
-of readpages by eliminating one extra memcpy. Allow update of file size
-from remote server even if file is open for write as long as mount is
-directio.  Recognize share mode security and send NTLM encrypted password
-on tree connect if share mode negotiated.
-
-Version 1.39
-------------
-Defer close of a file handle slightly if pending writes depend on that handle
-(this reduces the EBADF bad file handle errors that can be logged under heavy
-stress on writes). Modify cifs Kconfig options to expose CONFIG_CIFS_STATS2 
-Fix SFU style symlinks and mknod needed for servers which do not support the
-CIFS Unix Extensions.  Fix setfacl/getfacl on bigendian. Timeout negative
-dentries so files that the client sees as deleted but that later get created
-on the server will be recognized.  Add client side permission check on setattr.
-Timeout stuck requests better (where server has never responded or sent corrupt
-responses)
-
-Version 1.38
-------------
-Fix tcp socket retransmission timeouts (e.g. on ENOSPACE from the socket)
-to be smaller at first (but increasing) so large write performance performance
-over GigE is better.  Do not hang thread on illegal byte range lock response
-from Windows (Windows can send an RFC1001 size which does not match smb size) by
-allowing an SMBs TCP length to be up to a few bytes longer than it should be.
-wsize and rsize can now be larger than negotiated buffer size if server
-supports large readx/writex, even when directio mount flag not specified.
-Write size will in many cases now be 16K instead of 4K which greatly helps
-file copy performance on lightly loaded networks.  Fix oops in dnotify
-when experimental config flag enabled. Make cifsFYI more granular.
-
-Version 1.37
-------------
-Fix readdir caching when unlink removes file in current search buffer,
-and this is followed by a rewind search to just before the deleted entry.
-Do not attempt to set ctime unless atime and/or mtime change requested
-(most servers throw it away anyway). Fix length check of received smbs
-to be more accurate. Fix big endian problem with mapchars mount option,
-and with a field returned by statfs.
-
-Version 1.36
-------------
-Add support for mounting to older pre-CIFS servers such as Windows9x and ME.
-For these older servers, add option for passing netbios name of server in
-on mount (servernetbiosname).  Add suspend support for power management, to
-avoid cifsd thread preventing software suspend from working.
-Add mount option for disabling the default behavior of sending byte range lock
-requests to the server (necessary for certain applications which break with
-mandatory lock behavior such as Evolution), and also mount option for
-requesting case insensitive matching for path based requests (requesting
-case sensitive is the default).
-
-Version 1.35
-------------
-Add writepage performance improvements.  Fix path name conversions
-for long filenames on mounts which were done with "mapchars" mount option
-specified.  Ensure multiplex ids do not collide.  Fix case in which 
-rmmod can oops if done soon after last unmount.  Fix truncated
-search (readdir) output when resume filename was a long filename.
-Fix filename conversion when mapchars mount option was specified and
-filename was a long filename.
-
-Version 1.34
-------------
-Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
-Do not oops if root user kills cifs oplock kernel thread or
-kills the cifsd thread (NB: killing the cifs kernel threads is not
-recommended, unmount and rmmod cifs will kill them when they are
-no longer needed).  Fix readdir to ASCII servers (ie older servers
-which do not support Unicode) and also require asterisk.
-Fix out of memory case in which data could be written one page
-off in the page cache.
-
-Version 1.33
-------------
-Fix caching problem, in which readdir of directory containing a file
-which was cached could cause the file's time stamp to be updated
-without invalidating the readahead data (so we could get stale
-file data on the client for that file even as the server copy changed).
-Cleanup response processing so cifsd can not loop when abnormally
-terminated.
-
-
-Version 1.32
-------------
-Fix oops in ls when Transact2 FindFirst (or FindNext) returns more than one
-transact response for an SMB request and search entry split across two frames.
-Add support for lsattr (getting ext2/ext3/reiserfs attr flags from the server)
-as new protocol extensions. Do not send Get/Set calls for POSIX ACLs
-unless server explicitly claims to support them in CIFS Unix extensions
-POSIX ACL capability bit. Fix packet signing when multiuser mounting with
-different users from the same client to the same server. Fix oops in
-cifs_close. Add mount option for remapping reserved characters in
-filenames (also allow recognizing files with created by SFU which have any
-of these seven reserved characters, except backslash, to be recognized).
-Fix invalid transact2 message (we were sometimes trying to interpret
-oplock breaks as SMB responses). Add ioctl for checking that the
-current uid matches the uid of the mounter (needed by umount.cifs).
-Reduce the number of large buffer allocations in cifs response processing
-(significantly reduces memory pressure under heavy stress with multiple
-processes accessing the same server at the same time).
-
-Version 1.31
-------------
-Fix updates of DOS attributes and time fields so that files on NT4 servers
-do not get marked delete on close. Display sizes of cifs buffer pools in
-cifs stats. Fix oops in unmount when cifsd thread being killed by 
-shutdown. Add generic readv/writev and aio support. Report inode numbers 
-consistently in readdir and lookup (when serverino mount option is
-specified use the inode number that the server reports - for both lookup
-and readdir, otherwise by default the locally generated inode number is used
-for inodes created in either path since servers are not always able to 
-provide unique inode numbers when exporting multiple volumes from under one
-sharename).
-
-Version 1.30
-------------
-Allow new nouser_xattr mount parm to disable xattr support for user namespace.
-Do not flag user_xattr mount parm in dmesg.  Retry failures setting file time  
-(mostly affects NT4 servers) by retry with handle based network operation. 
-Add new POSIX Query FS Info for returning statfs info more accurately.
-Handle passwords with multiple commas in them.
-
-Version 1.29
-------------
-Fix default mode in sysfs of cifs module parms.  Remove old readdir routine.
-Fix capabilities flags for large readx so as to allow reads larger than 64K.
-
-Version 1.28
-------------
-Add module init parm for large SMB buffer size (to allow it to be changed
-from its default of 16K) which is especially useful for large file copy
-when mounting with the directio mount option. Fix oops after 
-returning from mount when experimental ExtendedSecurity enabled and
-SpnegoNegotiated returning invalid error. Fix case to retry better when 
-peek returns from 1 to 3 bytes on socket which should have more data.
-Fixed path based calls (such as cifs lookup) to handle path names
-longer than 530 (now can handle PATH_MAX). Fix pass through authentication
-from Samba server to DC (Samba required dummy LM password).
-
-Version 1.27
-------------
-Turn off DNOTIFY (directory change notification support) by default
-(unless built with the experimental flag) to fix hang with KDE
-file browser. Fix DNOTIFY flag mappings.  Fix hang (in wait_event
-waiting on an SMB response) in SendReceive when session dies but
-reconnects quickly from another task.  Add module init  parms for
-minimum number of large and small network buffers in the buffer pools,
-and for the maximum number of simultaneous requests.
-
-Version 1.26
-------------
-Add setfacl support to allow setting of ACLs remotely to Samba 3.10 and later
-and other POSIX CIFS compliant servers.  Fix error mapping for getfacl 
-to EOPNOTSUPP when server does not support posix acls on the wire. Fix 
-improperly zeroed buffer in CIFS Unix extensions set times call. 
-
-Version 1.25
-------------
-Fix internationalization problem in cifs readdir with filenames that map to 
-longer UTF-8 strings than the string on the wire was in Unicode.  Add workaround
-for readdir to netapp servers. Fix search rewind (seek into readdir to return 
-non-consecutive entries).  Do not do readdir when server negotiates 
-buffer size to small to fit filename. Add support for reading POSIX ACLs from
-the server (add also acl and noacl mount options).
-
-Version 1.24
-------------
-Optionally allow using server side inode numbers, rather than client generated
-ones by specifying mount option "serverino" - this is required for some apps
-to work which double check hardlinked files and have persistent inode numbers.
-
-Version 1.23
-------------
-Multiple bigendian fixes. On little endian systems (for reconnect after
-network failure) fix tcp session reconnect code so we do not try first
-to reconnect on reverse of port 445. Treat reparse points (NTFS junctions)
-as directories rather than symlinks because we can do follow link on them.
-
-Version 1.22
-------------
-Add config option to enable XATTR (extended attribute) support, mapping
-xattr names in the "user." namespace space to SMB/CIFS EAs. Lots of
-minor fixes pointed out by the Stanford SWAT checker (mostly missing
-or out of order NULL pointer checks in little used error paths).
-
-Version 1.21
-------------
-Add new mount parm to control whether mode check (generic_permission) is done
-on the client.  If Unix extensions are enabled and the uids on the client
-and server do not match, client permission checks are meaningless on
-server uids that do not exist on the client (this does not affect the
-normal ACL check which occurs on the server).  Fix default uid
-on mknod to match create and mkdir. Add optional mount parm to allow
-override of the default uid behavior (in which the server sets the uid
-and gid of newly created files). Normally for network filesystem mounts
-user want the server to set the uid/gid on newly created files (rather than 
-using uid of the client processes you would in a local filesystem).
-
-Version 1.20
-------------
-Make transaction counts more consistent. Merge /proc/fs/cifs/SimultaneousOps
-info into /proc/fs/cifs/DebugData.  Fix oops in rare oops in readdir 
-(in build_wildcard_path_from_dentry).  Fix mknod to pass type field
-(block/char/fifo) properly.  Remove spurious mount warning log entry when
-credentials passed as mount argument. Set major/minor device number in
-inode for block and char devices when unix extensions enabled.
-
-Version 1.19
-------------
-Fix /proc/fs/cifs/Stats and DebugData display to handle larger
-amounts of return data. Properly limit requests to MAX_REQ (50
-is the usual maximum active multiplex SMB/CIFS requests per server).
-Do not kill cifsd (and thus hurt the other SMB session) when more than one
-session to the same server (but with different userids) exists and one
-of the two user's smb sessions is being removed while leaving the other.
-Do not loop reconnecting in cifsd demultiplex thread when admin
-kills the thread without going through unmount.
-
-Version 1.18
-------------
-Do not rename hardlinked files (since that should be a noop). Flush
-cached write behind data when reopening a file after session abend,
-except when already in write. Grab per socket sem during reconnect 
-to avoid oops in sendmsg if overlapping with reconnect. Do not
-reset cached inode file size on readdir for files open for write on 
-client.
-
-
-Version 1.17
-------------
-Update number of blocks in file so du command is happier (in Linux a fake
-blocksize of 512 is required for calculating number of blocks in inode).
-Fix prepare write of partial pages to read in data from server if possible.
-Fix race on tcpStatus field between unmount and reconnection code, causing
-cifsd process sometimes to hang around forever. Improve out of memory
-checks in cifs_filldir
-
-Version 1.16
-------------
-Fix incorrect file size in file handle based setattr on big endian hardware.
-Fix oops in build_path_from_dentry when out of memory.  Add checks for invalid
-and closing file structs in writepage/partialpagewrite.  Add statistics
-for each mounted share (new menuconfig option). Fix endianness problem in
-volume information displayed in /proc/fs/cifs/DebugData (only affects
-affects big endian architectures). Prevent renames while constructing
-path names for open, mkdir and rmdir.
-
-Version 1.15
-------------
-Change to mempools for alloc smb request buffers and multiplex structs
-to better handle low memory problems (and potential deadlocks).
-
-Version 1.14
-------------
-Fix incomplete listings of large directories on Samba servers when Unix
-extensions enabled.  Fix oops when smb_buffer can not be allocated. Fix
-rename deadlock when writing out dirty pages at same time.
-
-Version 1.13
-------------
-Fix open of files in which O_CREATE can cause the mode to change in
-some cases. Fix case in which retry of write overlaps file close.
-Fix PPC64 build error.  Reduce excessive stack usage in smb password
-hashing. Fix overwrite of Linux user's view of file mode to Windows servers.
-
-Version 1.12
-------------
-Fixes for large file copy, signal handling, socket retry, buffer
-allocation and low memory situations.
-
-Version 1.11
-------------
-Better port 139 support to Windows servers (RFC1001/RFC1002 Session_Initialize)
-also now allowing support for specifying client netbiosname.  NT4 support added.
-
-Version 1.10
-------------
-Fix reconnection (and certain failed mounts) to properly wake up the
-blocked users thread so it does not seem hung (in some cases was blocked
-until the cifs receive timeout expired). Fix spurious error logging
-to kernel log when application with open network files killed. 
-
-Version 1.09
-------------
-Fix /proc/fs module unload warning message (that could be logged
-to the kernel log). Fix intermittent failure in connectathon
-test7 (hardlink count not immediately refreshed in case in which
-inode metadata can be incorrectly kept cached when time near zero)
-
-Version 1.08
-------------
-Allow file_mode and dir_mode (specified at mount time) to be enforced
-locally (the server already enforced its own ACLs too) for servers
-that do not report the correct mode (do not support the 
-CIFS Unix Extensions).
-
-Version 1.07
-------------
-Fix some small memory leaks in some unmount error paths. Fix major leak
-of cache pages in readpages causing multiple read oriented stress
-testcases (including fsx, and even large file copy) to fail over time. 
-
-Version 1.06
-------------
-Send NTCreateX with ATTR_POSIX if Linux/Unix extensions negotiated with server.
-This allows files that differ only in case and improves performance of file
-creation and file open to such servers.  Fix semaphore conflict which causes 
-slow delete of open file to Samba (which unfortunately can cause an oplock
-break to self while vfs_unlink held i_sem) which can hang for 20 seconds.
-
-Version 1.05
-------------
-fixes to cifs_readpages for fsx test case
-
-Version 1.04
-------------
-Fix caching data integrity bug when extending file size especially when no
-oplock on file.  Fix spurious logging of valid already parsed mount options
-that are parsed outside of the cifs vfs such as nosuid.
-
-
-Version 1.03
-------------
-Connect to server when port number override not specified, and tcp port
-unitialized.  Reset search to restart at correct file when kernel routine
-filldir returns error during large directory searches (readdir). 
-
-Version 1.02
-------------
-Fix caching problem when files opened by multiple clients in which 
-page cache could contain stale data, and write through did
-not occur often enough while file was still open when read ahead
-(read oplock) not allowed.  Treat "sep=" when first mount option
-as an override of comma as the default separator between mount
-options. 
-
-Version 1.01
-------------
-Allow passwords longer than 16 bytes. Allow null password string.
-
-Version 1.00
-------------
-Gracefully clean up failed mounts when attempting to mount to servers such as
-Windows 98 that terminate tcp sessions during protocol negotiation.  Handle
-embedded commas in mount parsing of passwords.
-
-Version 0.99
-------------
-Invalidate local inode cached pages on oplock break and when last file
-instance is closed so that the client does not continue using stale local
-copy rather than later modified server copy of file.  Do not reconnect
-when server drops the tcp session prematurely before negotiate
-protocol response.  Fix oops in reopen_file when dentry freed.  Allow
-the support for CIFS Unix Extensions to be disabled via proc interface.
-
-Version 0.98
-------------
-Fix hang in commit_write during reconnection of open files under heavy load.
-Fix unload_nls oops in a mount failure path. Serialize writes to same socket
-which also fixes any possible races when cifs signatures are enabled in SMBs
-being sent out of signature sequence number order.    
-
-Version 0.97
-------------
-Fix byte range locking bug (endian problem) causing bad offset and
-length.
-
-Version 0.96
-------------
-Fix oops (in send_sig) caused by CIFS unmount code trying to
-wake up the demultiplex thread after it had exited. Do not log
-error on harmless oplock release of closed handle.
-
-Version 0.95
-------------
-Fix unsafe global variable usage and password hash failure on gcc 3.3.1
-Fix problem reconnecting secondary mounts to same server after session 
-failure.  Fix invalid dentry - race in mkdir when directory gets created
-by another client between the lookup and mkdir.
-Version 0.94
-------------
-Fix to list processing in reopen_files. Fix reconnection when server hung
-but tcpip session still alive.  Set proper timeout on socket read.
-
-Version 0.93
-------------
-Add missing mount options including iocharset.  SMP fixes in write and open. 
-Fix errors in reconnecting after TCP session failure.  Fix module unloading
-of default nls codepage
-
-Version 0.92
-------------
-Active smb transactions should never go negative (fix double FreeXid). Fix
-list processing in file routines. Check return code on kmalloc in open.
-Fix spinlock usage for SMP.
-
-Version 0.91
-------------
-Fix oops in reopen_files when invalid dentry. drop dentry on server rename 
-and on revalidate errors. Fix cases where pid is now tgid.  Fix return code
-on create hard link when server does not support them. 
-
-Version 0.90
-------------
-Fix scheduling while atomic error in getting inode info on newly created file. 
-Fix truncate of existing files opened with O_CREAT but not O_TRUNC set.
-
-Version 0.89
-------------
-Fix oops on write to dead tcp session. Remove error log write for case when file open
-O_CREAT but not O_EXCL
-
-Version 0.88
-------------
-Fix non-POSIX behavior on rename of open file and delete of open file by taking 
-advantage of trans2 SetFileInfo rename facility if available on target server.
-Retry on ENOSPC and EAGAIN socket errors.
-
-Version 0.87
-------------
-Fix oops on big endian readdir.  Set blksize to be even power of two (2**blkbits) to fix
-allocation size miscalculation. After oplock token lost do not read through
-cache. 
-
-Version 0.86
-------------
-Fix oops on empty file readahead.  Fix for file size handling for locally cached files.
-
-Version 0.85
-------------
-Fix oops in mkdir when server fails to return inode info. Fix oops in reopen_files
-during auto reconnection to server after server recovered from failure.
-
-Version 0.84
-------------
-Finish support for Linux 2.5 open/create changes, which removes the
-redundant NTCreate/QPathInfo/close that was sent during file create.
-Enable oplock by default. Enable packet signing by default (needed to 
-access many recent Windows servers)
-
-Version 0.83
-------------
-Fix oops when mounting to long server names caused by inverted parms to kmalloc.
-Fix MultiuserMount (/proc/fs/cifs configuration setting) so that when enabled
-we will choose a cifs user session (smb uid) that better matches the local
-uid if a) the mount uid does not match the current uid and b) we have another
-session to the same server (ip address) for a different mount which
-matches the current local uid.
-
-Version 0.82
-------------
-Add support for mknod of block or character devices.  Fix oplock
-code (distributed caching) to properly send response to oplock
-break from server.
-
-Version 0.81
-------------
-Finish up CIFS packet digital signing for the default
-NTLM security case. This should help Windows 2003
-network interoperability since it is common for
-packet signing to be required now. Fix statfs (stat -f)
-which recently started returning errors due to 
-invalid value (-1 instead of 0) being set in the
-struct kstatfs f_ffiles field.
-
-Version 0.80
------------
-Fix oops on stopping oplock thread when removing cifs when
-built as module.
-
-Version 0.79
-------------
-Fix mount options for ro (readonly), uid, gid and file and directory mode. 
-
-Version 0.78
-------------
-Fix errors displayed on failed mounts to be more understandable.
-Fixed various incorrect or misleading smb to posix error code mappings.
-
-Version 0.77
-------------
-Fix display of NTFS DFS junctions to display as symlinks.
-They are the network equivalent.  Fix oops in 
-cifs_partialpagewrite caused by missing spinlock protection
-of openfile linked list.  Allow writebehind caching errors to 
-be returned to the application at file close.
-
-Version 0.76
-------------
-Clean up options displayed in /proc/mounts by show_options to
-be more consistent with other filesystems.
-
-Version 0.75
-------------
-Fix delete of readonly file to Windows servers.  Reflect
-presence or absence of read only dos attribute in mode
-bits for servers that do not support CIFS Unix extensions.
-Fix shortened results on readdir of large directories to
-servers supporting CIFS Unix extensions (caused by
-incorrect resume key).
-
-Version 0.74
-------------
-Fix truncate bug (set file size) that could cause hangs e.g. running fsx
-
-Version 0.73
-------------
-unload nls if mount fails.
-
-Version 0.72
-------------
-Add resume key support to search (readdir) code to workaround
-Windows bug.  Add /proc/fs/cifs/LookupCacheEnable which
-allows disabling caching of attribute information for
-lookups.
-
-Version 0.71
-------------
-Add more oplock handling (distributed caching code).  Remove
-dead code.  Remove excessive stack space utilization from
-symlink routines.
-
-Version 0.70
-------------
-Fix oops in get dfs referral (triggered when null path sent in to
-mount).  Add support for overriding rsize at mount time.
-
-Version 0.69
-------------
-Fix buffer overrun in readdir which caused intermittent kernel oopses.
-Fix writepage code to release kmap on write data.  Allow "-ip=" new 
-mount option to be passed in on parameter distinct from the first part
-(server name portion of) the UNC name.  Allow override of the
-tcp port of the target server via new mount option "-port="  
-
-Version 0.68
-------------
-Fix search handle leak on rewind.  Fix setuid and gid so that they are 
-reflected in the local inode immediately.  Cleanup of whitespace
-to make 2.4 and 2.5 versions more consistent.
-
-
-Version 0.67
-------------
-Fix signal sending so that captive thread (cifsd) exits on umount 
-(which was causing the warning in kmem_cache_free of the request buffers
-at rmmod time).  This had broken as a sideeffect of the recent global
-kernel change to daemonize.  Fix memory leak in readdir code which
-showed up in "ls -R" (and applications that did search rewinding).
-
-Version 0.66
-------------
-Reconnect tids and fids after session reconnection (still do not
-reconnect byte range locks though).  Fix problem caching
-lookup information for directory inodes, improving performance,
-especially in deep directory trees.  Fix various build warnings.
-
-Version 0.65
-------------
-Finish fixes to commit write for caching/readahead consistency.  fsx 
-now works to Samba servers.  Fix oops caused when readahead
-was interrupted by a signal.
-
-Version 0.64
-------------
-Fix data corruption (in partial page after truncate) that caused fsx to
-fail to Windows servers.  Cleaned up some extraneous error logging in
-common error paths.  Add generic sendfile support.
-
-Version 0.63
-------------
-Fix memory leak in AllocMidQEntry.
-Finish reconnection logic, so connection with server can be dropped
-(or server rebooted) and the cifs client will reconnect.  
-
-Version 0.62
-------------
-Fix temporary socket leak when bad userid or password specified 
-(or other SMBSessSetup failure).  Increase maximum buffer size to slightly
-over 16K to allow negotiation of up to Samba and Windows server default read 
-sizes.  Add support for readpages
-
-Version 0.61
-------------
-Fix oops when username not passed in on mount.  Extensive fixes and improvements
-to error logging (strip redundant newlines, change debug macros to ensure newline
-passed in and to be more consistent).  Fix writepage wrong file handle problem,
-a readonly file handle could be incorrectly used to attempt to write out
-file updates through the page cache to multiply open files.  This could cause
-the iozone benchmark to fail on the fwrite test. Fix bug mounting two different
-shares to the same Windows server when using different usernames
-(doing this to Samba servers worked but Windows was rejecting it) - now it is
-possible to use different userids when connecting to the same server from a
-Linux client. Fix oops when treeDisconnect called during unmount on
-previously freed socket.
-
-Version 0.60
-------------
-Fix oops in readpages caused by not setting address space operations in inode in 
-rare code path. 
-
-Version 0.59
-------------
-Includes support for deleting of open files and renaming over existing files (per POSIX
-requirement).  Add readlink support for Windows junction points (directory symlinks).
-
-Version 0.58
-------------
-Changed read and write to go through pagecache. Added additional address space operations.
-Memory mapped operations now working.
-
-Version 0.57
-------------
-Added writepage code for additional memory mapping support.  Fixed leak in xids causing
-the simultaneous operations counter (/proc/fs/cifs/SimultaneousOps) to increase on 
-every stat call.  Additional formatting cleanup. 
-
-Version 0.56
-------------
-Fix bigendian bug in order of time conversion. Merge 2.5 to 2.4 version.  Formatting cleanup.   
-
-Version 0.55
-------------
-Fixes from Zwane Mwaikambo for adding missing return code checking in a few places.
-Also included a modified version of his fix to protect global list manipulation of
-the smb session and tree connection and mid related global variables.
-
-Version 0.54
-------------
-Fix problem with captive thread hanging around at unmount time.  Adjust to 2.5.42-pre
-changes to superblock layout.   Remove wasteful allocation of smb buffers (now the send 
-buffer is reused for responses).  Add more oplock handling. Additional minor cleanup.
-
-Version 0.53
-------------
-More stylistic updates to better match kernel style.  Add additional statistics
-for filesystem which can be viewed via /proc/fs/cifs.  Add more pieces of NTLMv2
-and CIFS Packet Signing enablement.
-
-Version 0.52
-------------
-Replace call to sleep_on with safer wait_on_event.
-Make stylistic changes to better match kernel style recommendations.
-Remove most typedef usage (except for the PDUs themselves).
-
-Version 0.51
-------------
-Update mount so the -unc mount option is no longer required (the ip address can be specified
-in a UNC style device name.   Implementation of readpage/writepage started.
-
-Version 0.50
-------------
-Fix intermittent problem with incorrect smb header checking on badly 
-fragmented tcp responses
-
-Version 0.49
-------------
-Fixes to setting of allocation size and file size.
-
-Version 0.48
-------------
-Various 2.5.38 fixes.  Now works on 2.5.38
-
-Version 0.47
-------------
-Prepare for 2.5 kernel merge.  Remove ifdefs.
-
-Version 0.46
-------------
-Socket buffer management fixes.  Fix dual free.
-
-Version 0.45
-------------
-Various big endian fixes for hardlinks and symlinks and also for dfs.
-
-Version 0.44
-------------
-Various big endian fixes for servers with Unix extensions such as Samba
-
-Version 0.43
-------------
-Various FindNext fixes for incorrect filenames on large directory searches on big endian
-clients.  basic posix file i/o tests now work on big endian machines, not just le
-
-Version 0.42
-------------
-SessionSetup and NegotiateProtocol now work from Big Endian machines.
-Various Big Endian fixes found during testing on the Linux on 390.  Various fixes for compatibility with older
-versions of 2.4 kernel (now builds and works again on kernels at least as early as 2.4.7).
-
-Version 0.41
-------------
-Various minor fixes for Connectathon Posix "basic" file i/o test suite.  Directory caching fixed so hardlinked
-files now return the correct number of links on fstat as they are repeatedly linked and unlinked.
-
-Version 0.40
-------------
-Implemented "Raw" (i.e. not encapsulated in SPNEGO) NTLMSSP (i.e. the Security Provider Interface used to negotiate
-session advanced session authentication).  Raw NTLMSSP is preferred by Windows 2000 Professional and Windows XP.
-Began implementing support for SPNEGO encapsulation of NTLMSSP based session authentication blobs
-(which is the mechanism preferred by Windows 2000 server in the absence of Kerberos).
-
-Version 0.38
-------------
-Introduced optional mount helper utility mount.cifs and made coreq changes to cifs vfs to enable
-it. Fixed a few bugs in the DFS code (e.g. bcc two bytes too short and incorrect uid in PDU).
-
-Version 0.37
-------------
-Rewrote much of connection and mount/unmount logic to handle bugs with
-multiple uses to same share, multiple users to same server etc.
-
-Version 0.36
-------------
-Fixed major problem with dentry corruption (missing call to dput)
-
-Version 0.35
-------------
-Rewrite of readdir code to fix bug. Various fixes for bigendian machines.
-Begin adding oplock support.  Multiusermount and oplockEnabled flags added to /proc/fs/cifs
-although corresponding function not fully implemented in the vfs yet
-
-Version 0.34
-------------
-Fixed dentry caching bug, misc. cleanup 
-
-Version 0.33
-------------
-Fixed 2.5 support to handle build and configure changes as well as misc. 2.5 changes.  Now can build
-on current 2.5 beta version (2.5.24) of the Linux kernel as well as on 2.4 Linux kernels.
-Support for STATUS codes (newer 32 bit NT error codes) added.  DFS support begun to be added.
-
-Version 0.32
-------------
-Unix extensions (symlink, readlink, hardlink, chmod and some chgrp and chown) implemented
-and tested against Samba 2.2.5
-
-
-Version 0.31
-------------
-1) Fixed lockrange to be correct (it was one byte too short)
-
-2) Fixed GETLK (i.e. the fcntl call to test a range of bytes in a file to see if locked) to correctly 
-show range as locked when there is a conflict with an existing lock.
-
-3) default file perms are now 2767 (indicating support for mandatory locks) instead of 777 for directories
-in most cases.  Eventually will offer optional ability to query server for the correct perms.
-
-3) Fixed eventual trap when mounting twice to different shares on the same server when the first succeeded 
-but the second one was invalid and failed (the second one was incorrectly disconnecting the tcp and smb
-session) 
-
-4) Fixed error logging of valid mount options
-
-5) Removed logging of password field.
-
-6) Moved negotiate, treeDisconnect and uloggoffX (only tConx and SessSetup remain in connect.c) to cifssmb.c
-and cleaned them up and made them more consistent with other cifs functions. 
-
-7) Server support for Unix extensions is now fully detected and FindFirst is implemented both ways 
-(with or without Unix extensions) but FindNext and QueryPathInfo with the Unix extensions are not completed,
-nor is the symlink support using the Unix extensions
-
-8) Started adding the readlink and follow_link code 
-
-Version 0.3 
------------
-Initial drop
-
index aa0d68b..1964d21 100644 (file)
@@ -6,7 +6,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o inode.o \
          link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
          cifs_unicode.o nterr.o xattr.o cifsencrypt.o \
-         readdir.o ioctl.o sess.o export.o smb1ops.o
+         readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o
 
 cifs-$(CONFIG_CIFS_ACL) += cifsacl.o
 
diff --git a/fs/cifs/README b/fs/cifs/README
deleted file mode 100644 (file)
index 2d5622f..0000000
+++ /dev/null
@@ -1,753 +0,0 @@
-The CIFS VFS support for Linux supports many advanced network filesystem 
-features such as hierarchical dfs like namespace, hardlinks, locking and more.  
-It was designed to comply with the SNIA CIFS Technical Reference (which 
-supersedes the 1992 X/Open SMB Standard) as well as to perform best practice 
-practical interoperability with Windows 2000, Windows XP, Samba and equivalent 
-servers.  This code was developed in participation with the Protocol Freedom
-Information Foundation.
-
-Please see
-  http://protocolfreedom.org/ and
-  http://samba.org/samba/PFIF/
-for more details.
-
-
-For questions or bug reports please contact:
-    sfrench@samba.org (sfrench@us.ibm.com) 
-
-Build instructions:
-==================
-For Linux 2.4:
-1) Get the kernel source (e.g.from http://www.kernel.org)
-and download the cifs vfs source (see the project page
-at http://us1.samba.org/samba/Linux_CIFS_client.html)
-and change directory into the top of the kernel directory
-then patch the kernel (e.g. "patch -p1 < cifs_24.patch") 
-to add the cifs vfs to your kernel configure options if
-it has not already been added (e.g. current SuSE and UL
-users do not need to apply the cifs_24.patch since the cifs vfs is
-already in the kernel configure menu) and then
-mkdir linux/fs/cifs and then copy the current cifs vfs files from
-the cifs download to your kernel build directory e.g.
-
-       cp <cifs_download_dir>/fs/cifs/* to <kernel_download_dir>/fs/cifs
-       
-2) make menuconfig (or make xconfig)
-3) select cifs from within the network filesystem choices
-4) save and exit
-5) make dep
-6) make modules (or "make" if CIFS VFS not to be built as a module)
-
-For Linux 2.6:
-1) Download the kernel (e.g. from http://www.kernel.org)
-and change directory into the top of the kernel directory tree
-(e.g. /usr/src/linux-2.5.73)
-2) make menuconfig (or make xconfig)
-3) select cifs from within the network filesystem choices
-4) save and exit
-5) make
-
-
-Installation instructions:
-=========================
-If you have built the CIFS vfs as module (successfully) simply
-type "make modules_install" (or if you prefer, manually copy the file to
-the modules directory e.g. /lib/modules/2.4.10-4GB/kernel/fs/cifs/cifs.o).
-
-If you have built the CIFS vfs into the kernel itself, follow the instructions
-for your distribution on how to install a new kernel (usually you
-would simply type "make install").
-
-If you do not have the utility mount.cifs (in the Samba 3.0 source tree and on 
-the CIFS VFS web site) copy it to the same directory in which mount.smbfs and 
-similar files reside (usually /sbin).  Although the helper software is not  
-required, mount.cifs is recommended.  Eventually the Samba 3.0 utility program 
-"net" may also be helpful since it may someday provide easier mount syntax for
-users who are used to Windows e.g.
-       net use <mount point> <UNC name or cifs URL>
-Note that running the Winbind pam/nss module (logon service) on all of your
-Linux clients is useful in mapping Uids and Gids consistently across the
-domain to the proper network user.  The mount.cifs mount helper can be
-trivially built from Samba 3.0 or later source e.g. by executing:
-
-       gcc samba/source/client/mount.cifs.c -o mount.cifs
-
-If cifs is built as a module, then the size and number of network buffers
-and maximum number of simultaneous requests to one server can be configured.
-Changing these from their defaults is not recommended. By executing modinfo
-       modinfo kernel/fs/cifs/cifs.ko
-on kernel/fs/cifs/cifs.ko the list of configuration changes that can be made
-at module initialization time (by running insmod cifs.ko) can be seen.
-
-Allowing User Mounts
-====================
-To permit users to mount and unmount over directories they own is possible
-with the cifs vfs.  A way to enable such mounting is to mark the mount.cifs
-utility as suid (e.g. "chmod +s /sbin/mount.cifs). To enable users to 
-umount shares they mount requires
-1) mount.cifs version 1.4 or later
-2) an entry for the share in /etc/fstab indicating that a user may
-unmount it e.g.
-//server/usersharename  /mnt/username cifs user 0 0
-
-Note that when the mount.cifs utility is run suid (allowing user mounts), 
-in order to reduce risks, the "nosuid" mount flag is passed in on mount to
-disallow execution of an suid program mounted on the remote target.
-When mount is executed as root, nosuid is not passed in by default,
-and execution of suid programs on the remote target would be enabled
-by default. This can be changed, as with nfs and other filesystems, 
-by simply specifying "nosuid" among the mount options. For user mounts 
-though to be able to pass the suid flag to mount requires rebuilding 
-mount.cifs with the following flag: 
-        gcc samba/source/client/mount.cifs.c -DCIFS_ALLOW_USR_SUID -o mount.cifs
-
-There is a corresponding manual page for cifs mounting in the Samba 3.0 and
-later source tree in docs/manpages/mount.cifs.8 
-
-Allowing User Unmounts
-======================
-To permit users to ummount directories that they have user mounted (see above),
-the utility umount.cifs may be used.  It may be invoked directly, or if 
-umount.cifs is placed in /sbin, umount can invoke the cifs umount helper
-(at least for most versions of the umount utility) for umount of cifs
-mounts, unless umount is invoked with -i (which will avoid invoking a umount
-helper). As with mount.cifs, to enable user unmounts umount.cifs must be marked
-as suid (e.g. "chmod +s /sbin/umount.cifs") or equivalent (some distributions
-allow adding entries to a file to the /etc/permissions file to achieve the
-equivalent suid effect).  For this utility to succeed the target path
-must be a cifs mount, and the uid of the current user must match the uid
-of the user who mounted the resource.
-
-Also note that the customary way of allowing user mounts and unmounts is 
-(instead of using mount.cifs and unmount.cifs as suid) to add a line
-to the file /etc/fstab for each //server/share you wish to mount, but
-this can become unwieldy when potential mount targets include many
-or  unpredictable UNC names.
-
-Samba Considerations 
-==================== 
-To get the maximum benefit from the CIFS VFS, we recommend using a server that 
-supports the SNIA CIFS Unix Extensions standard (e.g.  Samba 2.2.5 or later or 
-Samba 3.0) but the CIFS vfs works fine with a wide variety of CIFS servers.  
-Note that uid, gid and file permissions will display default values if you do 
-not have a server that supports the Unix extensions for CIFS (such as Samba 
-2.2.5 or later).  To enable the Unix CIFS Extensions in the Samba server, add 
-the line: 
-
-       unix extensions = yes
-       
-to your smb.conf file on the server.  Note that the following smb.conf settings 
-are also useful (on the Samba server) when the majority of clients are Unix or 
-Linux: 
-
-       case sensitive = yes
-       delete readonly = yes 
-       ea support = yes
-
-Note that server ea support is required for supporting xattrs from the Linux
-cifs client, and that EA support is present in later versions of Samba (e.g. 
-3.0.6 and later (also EA support works in all versions of Windows, at least to
-shares on NTFS filesystems).  Extended Attribute (xattr) support is an optional
-feature of most Linux filesystems which may require enabling via
-make menuconfig. Client support for extended attributes (user xattr) can be
-disabled on a per-mount basis by specifying "nouser_xattr" on mount.
-
-The CIFS client can get and set POSIX ACLs (getfacl, setfacl) to Samba servers
-version 3.10 and later.  Setting POSIX ACLs requires enabling both XATTR and 
-then POSIX support in the CIFS configuration options when building the cifs
-module.  POSIX ACL support can be disabled on a per mount basic by specifying
-"noacl" on mount.
-Some administrators may want to change Samba's smb.conf "map archive" and 
-"create mask" parameters from the default.  Unless the create mask is changed
-newly created files can end up with an unnecessarily restrictive default mode,
-which may not be what you want, although if the CIFS Unix extensions are
-enabled on the server and client, subsequent setattr calls (e.g. chmod) can
-fix the mode.  Note that creating special devices (mknod) remotely 
-may require specifying a mkdev function to Samba if you are not using 
-Samba 3.0.6 or later.  For more information on these see the manual pages
-("man smb.conf") on the Samba server system.  Note that the cifs vfs,
-unlike the smbfs vfs, does not read the smb.conf on the client system 
-(the few optional settings are passed in on mount via -o parameters instead).  
-Note that Samba 2.2.7 or later includes a fix that allows the CIFS VFS to delete
-open files (required for strict POSIX compliance).  Windows Servers already 
-supported this feature. Samba server does not allow symlinks that refer to files
-outside of the share, so in Samba versions prior to 3.0.6, most symlinks to
-files with absolute paths (ie beginning with slash) such as:
-        ln -s /mnt/foo bar
-would be forbidden. Samba 3.0.6 server or later includes the ability to create 
-such symlinks safely by converting unsafe symlinks (ie symlinks to server 
-files that are outside of the share) to a samba specific format on the server
-that is ignored by local server applications and non-cifs clients and that will
-not be traversed by the Samba server).  This is opaque to the Linux client
-application using the cifs vfs. Absolute symlinks will work to Samba 3.0.5 or
-later, but only for remote clients using the CIFS Unix extensions, and will
-be invisbile to Windows clients and typically will not affect local
-applications running on the same server as Samba.  
-
-Use instructions:
-================
-Once the CIFS VFS support is built into the kernel or installed as a module 
-(cifs.o), you can use mount syntax like the following to access Samba or Windows 
-servers: 
-
-  mount -t cifs //9.53.216.11/e$ /mnt -o user=myname,pass=mypassword
-
-Before -o the option -v may be specified to make the mount.cifs
-mount helper display the mount steps more verbosely.  
-After -o the following commonly used cifs vfs specific options
-are supported:
-
-  user=<username>
-  pass=<password>
-  domain=<domain name>
-  
-Other cifs mount options are described below.  Use of TCP names (in addition to
-ip addresses) is available if the mount helper (mount.cifs) is installed. If
-you do not trust the server to which are mounted, or if you do not have
-cifs signing enabled (and the physical network is insecure), consider use
-of the standard mount options "noexec" and "nosuid" to reduce the risk of 
-running an altered binary on your local system (downloaded from a hostile server
-or altered by a hostile router).
-
-Although mounting using format corresponding to the CIFS URL specification is
-not possible in mount.cifs yet, it is possible to use an alternate format
-for the server and sharename (which is somewhat similar to NFS style mount
-syntax) instead of the more widely used UNC format (i.e. \\server\share):
-  mount -t cifs tcp_name_of_server:share_name /mnt -o user=myname,pass=mypasswd
-
-When using the mount helper mount.cifs, passwords may be specified via alternate
-mechanisms, instead of specifying it after -o using the normal "pass=" syntax
-on the command line:
-1) By including it in a credential file. Specify credentials=filename as one
-of the mount options. Credential files contain two lines
-        username=someuser
-        password=your_password
-2) By specifying the password in the PASSWD environment variable (similarly
-the user name can be taken from the USER environment variable).
-3) By specifying the password in a file by name via PASSWD_FILE
-4) By specifying the password in a file by file descriptor via PASSWD_FD
-
-If no password is provided, mount.cifs will prompt for password entry
-
-Restrictions
-============
-Servers must support either "pure-TCP" (port 445 TCP/IP CIFS connections) or RFC 
-1001/1002 support for "Netbios-Over-TCP/IP." This is not likely to be a 
-problem as most servers support this.
-
-Valid filenames differ between Windows and Linux.  Windows typically restricts
-filenames which contain certain reserved characters (e.g.the character : 
-which is used to delimit the beginning of a stream name by Windows), while
-Linux allows a slightly wider set of valid characters in filenames. Windows
-servers can remap such characters when an explicit mapping is specified in
-the Server's registry.  Samba starting with version 3.10 will allow such 
-filenames (ie those which contain valid Linux characters, which normally
-would be forbidden for Windows/CIFS semantics) as long as the server is
-configured for Unix Extensions (and the client has not disabled
-/proc/fs/cifs/LinuxExtensionsEnabled).
-  
-
-CIFS VFS Mount Options
-======================
-A partial list of the supported mount options follows:
-  user         The user name to use when trying to establish
-               the CIFS session.
-  password     The user password.  If the mount helper is
-               installed, the user will be prompted for password
-               if not supplied.
-  ip           The ip address of the target server
-  unc          The target server Universal Network Name (export) to 
-               mount.  
-  domain       Set the SMB/CIFS workgroup name prepended to the
-               username during CIFS session establishment
-  forceuid     Set the default uid for inodes to the uid
-               passed in on mount. For mounts to servers
-               which do support the CIFS Unix extensions, such as a
-               properly configured Samba server, the server provides
-               the uid, gid and mode so this parameter should not be
-               specified unless the server and clients uid and gid
-               numbering differ.  If the server and client are in the
-               same domain (e.g. running winbind or nss_ldap) and
-               the server supports the Unix Extensions then the uid
-               and gid can be retrieved from the server (and uid
-               and gid would not have to be specifed on the mount. 
-               For servers which do not support the CIFS Unix
-               extensions, the default uid (and gid) returned on lookup
-               of existing files will be the uid (gid) of the person
-               who executed the mount (root, except when mount.cifs
-               is configured setuid for user mounts) unless the "uid=" 
-               (gid) mount option is specified. Also note that permission
-               checks (authorization checks) on accesses to a file occur
-               at the server, but there are cases in which an administrator
-               may want to restrict at the client as well.  For those
-               servers which do not report a uid/gid owner
-               (such as Windows), permissions can also be checked at the
-               client, and a crude form of client side permission checking 
-               can be enabled by specifying file_mode and dir_mode on 
-               the client.  (default)
-  forcegid     (similar to above but for the groupid instead of uid) (default)
-  noforceuid   Fill in file owner information (uid) by requesting it from
-               the server if possible. With this option, the value given in
-               the uid= option (on mount) will only be used if the server
-               can not support returning uids on inodes.
-  noforcegid   (similar to above but for the group owner, gid, instead of uid)
-  uid          Set the default uid for inodes, and indicate to the
-               cifs kernel driver which local user mounted. If the server
-               supports the unix extensions the default uid is
-               not used to fill in the owner fields of inodes (files)
-               unless the "forceuid" parameter is specified.
-  gid          Set the default gid for inodes (similar to above).
-  file_mode     If CIFS Unix extensions are not supported by the server
-               this overrides the default mode for file inodes.
-  fsc          Enable local disk caching using FS-Cache (off by default). This
-               option could be useful to improve performance on a slow link,
-               heavily loaded server and/or network where reading from the
-               disk is faster than reading from the server (over the network).
-               This could also impact scalability positively as the
-               number of calls to the server are reduced. However, local
-               caching is not suitable for all workloads for e.g. read-once
-               type workloads. So, you need to consider carefully your
-               workload/scenario before using this option. Currently, local
-               disk caching is functional for CIFS files opened as read-only.
-  dir_mode      If CIFS Unix extensions are not supported by the server 
-               this overrides the default mode for directory inodes.
-  port         attempt to contact the server on this tcp port, before
-               trying the usual ports (port 445, then 139).
-  iocharset     Codepage used to convert local path names to and from
-               Unicode. Unicode is used by default for network path
-               names if the server supports it.  If iocharset is
-               not specified then the nls_default specified
-               during the local client kernel build will be used.
-               If server does not support Unicode, this parameter is
-               unused.
-  rsize                default read size (usually 16K). The client currently
-               can not use rsize larger than CIFSMaxBufSize. CIFSMaxBufSize
-               defaults to 16K and may be changed (from 8K to the maximum
-               kmalloc size allowed by your kernel) at module install time
-               for cifs.ko. Setting CIFSMaxBufSize to a very large value
-               will cause cifs to use more memory and may reduce performance
-               in some cases.  To use rsize greater than 127K (the original
-               cifs protocol maximum) also requires that the server support
-               a new Unix Capability flag (for very large read) which some
-               newer servers (e.g. Samba 3.0.26 or later) do. rsize can be
-               set from a minimum of 2048 to a maximum of 130048 (127K or
-               CIFSMaxBufSize, whichever is smaller)
-  wsize                default write size (default 57344)
-               maximum wsize currently allowed by CIFS is 57344 (fourteen
-               4096 byte pages)
-  actimeo=n    attribute cache timeout in seconds (default 1 second).
-               After this timeout, the cifs client requests fresh attribute
-               information from the server. This option allows to tune the
-               attribute cache timeout to suit the workload needs. Shorter
-               timeouts mean better the cache coherency, but increased number
-               of calls to the server. Longer timeouts mean reduced number
-               of calls to the server at the expense of less stricter cache
-               coherency checks (i.e. incorrect attribute cache for a short
-               period of time).
-  rw           mount the network share read-write (note that the
-               server may still consider the share read-only)
-  ro           mount network share read-only
-  version      used to distinguish different versions of the
-               mount helper utility (not typically needed)
-  sep          if first mount option (after the -o), overrides
-               the comma as the separator between the mount
-               parms. e.g.
-                       -o user=myname,password=mypassword,domain=mydom
-               could be passed instead with period as the separator by
-                       -o sep=.user=myname.password=mypassword.domain=mydom
-               this might be useful when comma is contained within username
-               or password or domain. This option is less important
-               when the cifs mount helper cifs.mount (version 1.1 or later)
-               is used.
-  nosuid        Do not allow remote executables with the suid bit 
-               program to be executed.  This is only meaningful for mounts
-               to servers such as Samba which support the CIFS Unix Extensions.
-               If you do not trust the servers in your network (your mount
-               targets) it is recommended that you specify this option for
-               greater security.
-  exec         Permit execution of binaries on the mount.
-  noexec       Do not permit execution of binaries on the mount.
-  dev          Recognize block devices on the remote mount.
-  nodev                Do not recognize devices on the remote mount.
-  suid          Allow remote files on this mountpoint with suid enabled to 
-               be executed (default for mounts when executed as root,
-               nosuid is default for user mounts).
-  credentials   Although ignored by the cifs kernel component, it is used by 
-               the mount helper, mount.cifs. When mount.cifs is installed it
-               opens and reads the credential file specified in order  
-               to obtain the userid and password arguments which are passed to
-               the cifs vfs.
-  guest         Although ignored by the kernel component, the mount.cifs
-               mount helper will not prompt the user for a password
-               if guest is specified on the mount options.  If no
-               password is specified a null password will be used.
-  perm          Client does permission checks (vfs_permission check of uid
-               and gid of the file against the mode and desired operation),
-               Note that this is in addition to the normal ACL check on the
-               target machine done by the server software. 
-               Client permission checking is enabled by default.
-  noperm        Client does not do permission checks.  This can expose
-               files on this mount to access by other users on the local
-               client system. It is typically only needed when the server
-               supports the CIFS Unix Extensions but the UIDs/GIDs on the
-               client and server system do not match closely enough to allow
-               access by the user doing the mount, but it may be useful with
-               non CIFS Unix Extension mounts for cases in which the default
-               mode is specified on the mount but is not to be enforced on the
-               client (e.g. perhaps when MultiUserMount is enabled)
-               Note that this does not affect the normal ACL check on the
-               target machine done by the server software (of the server
-               ACL against the user name provided at mount time).
-  serverino    Use server's inode numbers instead of generating automatically
-               incrementing inode numbers on the client.  Although this will
-               make it easier to spot hardlinked files (as they will have
-               the same inode numbers) and inode numbers may be persistent,
-               note that the server does not guarantee that the inode numbers
-               are unique if multiple server side mounts are exported under a
-               single share (since inode numbers on the servers might not
-               be unique if multiple filesystems are mounted under the same
-               shared higher level directory).  Note that some older
-               (e.g. pre-Windows 2000) do not support returning UniqueIDs
-               or the CIFS Unix Extensions equivalent and for those
-               this mount option will have no effect.  Exporting cifs mounts
-               under nfsd requires this mount option on the cifs mount.
-               This is now the default if server supports the 
-               required network operation.
-  noserverino   Client generates inode numbers (rather than using the actual one
-               from the server). These inode numbers will vary after
-               unmount or reboot which can confuse some applications,
-               but not all server filesystems support unique inode
-               numbers.
-  setuids       If the CIFS Unix extensions are negotiated with the server
-               the client will attempt to set the effective uid and gid of
-               the local process on newly created files, directories, and
-               devices (create, mkdir, mknod).  If the CIFS Unix Extensions
-               are not negotiated, for newly created files and directories
-               instead of using the default uid and gid specified on
-               the mount, cache the new file's uid and gid locally which means
-               that the uid for the file can change when the inode is
-               reloaded (or the user remounts the share).
-  nosetuids     The client will not attempt to set the uid and gid on
-               on newly created files, directories, and devices (create, 
-               mkdir, mknod) which will result in the server setting the
-               uid and gid to the default (usually the server uid of the
-               user who mounted the share).  Letting the server (rather than
-               the client) set the uid and gid is the default. If the CIFS
-               Unix Extensions are not negotiated then the uid and gid for
-               new files will appear to be the uid (gid) of the mounter or the
-               uid (gid) parameter specified on the mount.
-  netbiosname   When mounting to servers via port 139, specifies the RFC1001
-               source name to use to represent the client netbios machine 
-               name when doing the RFC1001 netbios session initialize.
-  direct        Do not do inode data caching on files opened on this mount.
-               This precludes mmapping files on this mount. In some cases
-               with fast networks and little or no caching benefits on the
-               client (e.g. when the application is doing large sequential
-               reads bigger than page size without rereading the same data) 
-               this can provide better performance than the default
-               behavior which caches reads (readahead) and writes 
-               (writebehind) through the local Linux client pagecache 
-               if oplock (caching token) is granted and held. Note that
-               direct allows write operations larger than page size
-               to be sent to the server.
-  strictcache   Use for switching on strict cache mode. In this mode the
-               client read from the cache all the time it has Oplock Level II,
-               otherwise - read from the server. All written data are stored
-               in the cache, but if the client doesn't have Exclusive Oplock,
-               it writes the data to the server.
-  rwpidforward  Forward pid of a process who opened a file to any read or write
-               operation on that file. This prevent applications like WINE
-               from failing on read and write if we use mandatory brlock style.
-  acl          Allow setfacl and getfacl to manage posix ACLs if server
-               supports them.  (default)
-  noacl        Do not allow setfacl and getfacl calls on this mount
-  user_xattr    Allow getting and setting user xattrs (those attributes whose
-               name begins with "user." or "os2.") as OS/2 EAs (extended
-               attributes) to the server.  This allows support of the
-               setfattr and getfattr utilities. (default)
-  nouser_xattr  Do not allow getfattr/setfattr to get/set/list xattrs 
-  mapchars      Translate six of the seven reserved characters (not backslash)
-                       *?<>|:
-               to the remap range (above 0xF000), which also
-               allows the CIFS client to recognize files created with
-               such characters by Windows's POSIX emulation. This can
-               also be useful when mounting to most versions of Samba
-               (which also forbids creating and opening files
-               whose names contain any of these seven characters).
-               This has no effect if the server does not support
-               Unicode on the wire.
- nomapchars     Do not translate any of these seven characters (default).
- nocase         Request case insensitive path name matching (case
-               sensitive is the default if the server supports it).
-               (mount option "ignorecase" is identical to "nocase")
- posixpaths     If CIFS Unix extensions are supported, attempt to
-               negotiate posix path name support which allows certain
-               characters forbidden in typical CIFS filenames, without
-               requiring remapping. (default)
- noposixpaths   If CIFS Unix extensions are supported, do not request
-               posix path name support (this may cause servers to
-               reject creatingfile with certain reserved characters).
- nounix         Disable the CIFS Unix Extensions for this mount (tree
-               connection). This is rarely needed, but it may be useful
-               in order to turn off multiple settings all at once (ie
-               posix acls, posix locks, posix paths, symlink support
-               and retrieving uids/gids/mode from the server) or to
-               work around a bug in server which implement the Unix
-               Extensions.
- nobrl          Do not send byte range lock requests to the server.
-               This is necessary for certain applications that break
-               with cifs style mandatory byte range locks (and most
-               cifs servers do not yet support requesting advisory
-               byte range locks).
- forcemandatorylock Even if the server supports posix (advisory) byte range
-               locking, send only mandatory lock requests.  For some
-               (presumably rare) applications, originally coded for
-               DOS/Windows, which require Windows style mandatory byte range
-               locking, they may be able to take advantage of this option,
-               forcing the cifs client to only send mandatory locks
-               even if the cifs server would support posix advisory locks.
-               "forcemand" is accepted as a shorter form of this mount
-               option.
- nostrictsync   If this mount option is set, when an application does an
-               fsync call then the cifs client does not send an SMB Flush
-               to the server (to force the server to write all dirty data
-               for this file immediately to disk), although cifs still sends
-               all dirty (cached) file data to the server and waits for the
-               server to respond to the write.  Since SMB Flush can be
-               very slow, and some servers may be reliable enough (to risk
-               delaying slightly flushing the data to disk on the server),
-               turning on this option may be useful to improve performance for
-               applications that fsync too much, at a small risk of server
-               crash.  If this mount option is not set, by default cifs will
-               send an SMB flush request (and wait for a response) on every
-               fsync call.
- nodfs          Disable DFS (global name space support) even if the
-               server claims to support it.  This can help work around
-               a problem with parsing of DFS paths with Samba server
-               versions 3.0.24 and 3.0.25.
- remount        remount the share (often used to change from ro to rw mounts
-               or vice versa)
- cifsacl        Report mode bits (e.g. on stat) based on the Windows ACL for
-               the file. (EXPERIMENTAL)
- servern        Specify the server 's netbios name (RFC1001 name) to use
-               when attempting to setup a session to the server. 
-               This is needed for mounting to some older servers (such
-               as OS/2 or Windows 98 and Windows ME) since they do not
-               support a default server name.  A server name can be up
-               to 15 characters long and is usually uppercased.
- sfu            When the CIFS Unix Extensions are not negotiated, attempt to
-               create device files and fifos in a format compatible with
-               Services for Unix (SFU).  In addition retrieve bits 10-12
-               of the mode via the SETFILEBITS extended attribute (as
-               SFU does).  In the future the bottom 9 bits of the
-               mode also will be emulated using queries of the security
-               descriptor (ACL).
- mfsymlinks     Enable support for Minshall+French symlinks
-               (see http://wiki.samba.org/index.php/UNIX_Extensions#Minshall.2BFrench_symlinks)
-               This option is ignored when specified together with the
-               'sfu' option. Minshall+French symlinks are used even if
-               the server supports the CIFS Unix Extensions.
- sign           Must use packet signing (helps avoid unwanted data modification
-               by intermediate systems in the route).  Note that signing
-               does not work with lanman or plaintext authentication.
- seal           Must seal (encrypt) all data on this mounted share before
-               sending on the network.  Requires support for Unix Extensions.
-               Note that this differs from the sign mount option in that it
-               causes encryption of data sent over this mounted share but other
-               shares mounted to the same server are unaffected.
- locallease     This option is rarely needed. Fcntl F_SETLEASE is
-               used by some applications such as Samba and NFSv4 server to
-               check to see whether a file is cacheable.  CIFS has no way
-               to explicitly request a lease, but can check whether a file
-               is cacheable (oplocked).  Unfortunately, even if a file
-               is not oplocked, it could still be cacheable (ie cifs client
-               could grant fcntl leases if no other local processes are using
-               the file) for cases for example such as when the server does not
-               support oplocks and the user is sure that the only updates to
-               the file will be from this client. Specifying this mount option
-               will allow the cifs client to check for leases (only) locally
-               for files which are not oplocked instead of denying leases
-               in that case. (EXPERIMENTAL)
- sec            Security mode.  Allowed values are:
-                       none    attempt to connection as a null user (no name)
-                       krb5    Use Kerberos version 5 authentication
-                       krb5i   Use Kerberos authentication and packet signing
-                       ntlm    Use NTLM password hashing (default)
-                       ntlmi   Use NTLM password hashing with signing (if
-                               /proc/fs/cifs/PacketSigningEnabled on or if
-                               server requires signing also can be the default) 
-                       ntlmv2  Use NTLMv2 password hashing      
-                       ntlmv2i Use NTLMv2 password hashing with packet signing
-                       lanman  (if configured in kernel config) use older
-                               lanman hash
-hard           Retry file operations if server is not responding
-soft           Limit retries to unresponsive servers (usually only
-               one retry) before returning an error.  (default)
-
-The mount.cifs mount helper also accepts a few mount options before -o
-including:
-
-       -S      take password from stdin (equivalent to setting the environment
-               variable "PASSWD_FD=0"
-       -V      print mount.cifs version
-       -?      display simple usage information
-
-With most 2.6 kernel versions of modutils, the version of the cifs kernel
-module can be displayed via modinfo.
-
-Misc /proc/fs/cifs Flags and Debug Info
-=======================================
-Informational pseudo-files:
-DebugData              Displays information about active CIFS sessions and
-                       shares, features enabled as well as the cifs.ko
-                       version.
-Stats                  Lists summary resource usage information as well as per
-                       share statistics, if CONFIG_CIFS_STATS in enabled
-                       in the kernel configuration.
-
-Configuration pseudo-files:
-PacketSigningEnabled   If set to one, cifs packet signing is enabled
-                       and will be used if the server requires 
-                       it.  If set to two, cifs packet signing is
-                       required even if the server considers packet
-                       signing optional. (default 1)
-SecurityFlags          Flags which control security negotiation and
-                       also packet signing. Authentication (may/must)
-                       flags (e.g. for NTLM and/or NTLMv2) may be combined with
-                       the signing flags.  Specifying two different password
-                       hashing mechanisms (as "must use") on the other hand 
-                       does not make much sense. Default flags are 
-                               0x07007 
-                       (NTLM, NTLMv2 and packet signing allowed).  The maximum 
-                       allowable flags if you want to allow mounts to servers
-                       using weaker password hashes is 0x37037 (lanman,
-                       plaintext, ntlm, ntlmv2, signing allowed).  Some
-                       SecurityFlags require the corresponding menuconfig
-                       options to be enabled (lanman and plaintext require
-                       CONFIG_CIFS_WEAK_PW_HASH for example).  Enabling
-                       plaintext authentication currently requires also
-                       enabling lanman authentication in the security flags
-                       because the cifs module only supports sending
-                       laintext passwords using the older lanman dialect
-                       form of the session setup SMB.  (e.g. for authentication
-                       using plain text passwords, set the SecurityFlags
-                       to 0x30030):
-                       may use packet signing                          0x00001
-                       must use packet signing                         0x01001
-                       may use NTLM (most common password hash)        0x00002
-                       must use NTLM                                   0x02002
-                       may use NTLMv2                                  0x00004
-                       must use NTLMv2                                 0x04004
-                       may use Kerberos security                       0x00008
-                       must use Kerberos                               0x08008
-                       may use lanman (weak) password hash             0x00010
-                       must use lanman password hash                   0x10010
-                       may use plaintext passwords                     0x00020
-                       must use plaintext passwords                    0x20020
-                       (reserved for future packet encryption)         0x00040
-
-cifsFYI                        If set to non-zero value, additional debug information
-                       will be logged to the system error log.  This field
-                       contains three flags controlling different classes of
-                       debugging entries.  The maximum value it can be set
-                       to is 7 which enables all debugging points (default 0).
-                       Some debugging statements are not compiled into the
-                       cifs kernel unless CONFIG_CIFS_DEBUG2 is enabled in the
-                       kernel configuration. cifsFYI may be set to one or
-                       nore of the following flags (7 sets them all):
-
-                       log cifs informational messages                 0x01
-                       log return codes from cifs entry points         0x02
-                       log slow responses (ie which take longer than 1 second)
-                         CONFIG_CIFS_STATS2 must be enabled in .config 0x04
-                               
-                               
-traceSMB               If set to one, debug information is logged to the
-                       system error log with the start of smb requests
-                       and responses (default 0)
-LookupCacheEnable      If set to one, inode information is kept cached
-                       for one second improving performance of lookups
-                       (default 1)
-OplockEnabled          If set to one, safe distributed caching enabled.
-                       (default 1)
-LinuxExtensionsEnabled If set to one then the client will attempt to
-                       use the CIFS "UNIX" extensions which are optional
-                       protocol enhancements that allow CIFS servers
-                       to return accurate UID/GID information as well
-                       as support symbolic links. If you use servers
-                       such as Samba that support the CIFS Unix
-                       extensions but do not want to use symbolic link
-                       support and want to map the uid and gid fields 
-                       to values supplied at mount (rather than the 
-                       actual values, then set this to zero. (default 1)
-
-These experimental features and tracing can be enabled by changing flags in 
-/proc/fs/cifs (after the cifs module has been installed or built into the 
-kernel, e.g.  insmod cifs).  To enable a feature set it to 1 e.g.  to enable 
-tracing to the kernel message log type: 
-
-       echo 7 > /proc/fs/cifs/cifsFYI
-       
-cifsFYI functions as a bit mask. Setting it to 1 enables additional kernel
-logging of various informational messages.  2 enables logging of non-zero
-SMB return codes while 4 enables logging of requests that take longer
-than one second to complete (except for byte range lock requests). 
-Setting it to 4 requires defining CONFIG_CIFS_STATS2 manually in the
-source code (typically by setting it in the beginning of cifsglob.h),
-and setting it to seven enables all three.  Finally, tracing
-the start of smb requests and responses can be enabled via:
-
-       echo 1 > /proc/fs/cifs/traceSMB
-
-Per share (per client mount) statistics are available in /proc/fs/cifs/Stats
-if the kernel was configured with cifs statistics enabled.  The statistics
-represent the number of successful (ie non-zero return code from the server) 
-SMB responses to some of the more common commands (open, delete, mkdir etc.).
-Also recorded is the total bytes read and bytes written to the server for
-that share.  Note that due to client caching effects this can be less than the
-number of bytes read and written by the application running on the client.
-The statistics for the number of total SMBs and oplock breaks are different in
-that they represent all for that share, not just those for which the server
-returned success.
-       
-Also note that "cat /proc/fs/cifs/DebugData" will display information about
-the active sessions and the shares that are mounted.
-
-Enabling Kerberos (extended security) works but requires version 1.2 or later
-of the helper program cifs.upcall to be present and to be configured in the
-/etc/request-key.conf file.  The cifs.upcall helper program is from the Samba
-project(http://www.samba.org). NTLM and NTLMv2 and LANMAN support do not
-require this helper. Note that NTLMv2 security (which does not require the
-cifs.upcall helper program), instead of using Kerberos, is sufficient for
-some use cases.
-
-DFS support allows transparent redirection to shares in an MS-DFS name space.
-In addition, DFS support for target shares which are specified as UNC
-names which begin with host names (rather than IP addresses) requires
-a user space helper (such as cifs.upcall) to be present in order to
-translate host names to ip address, and the user space helper must also
-be configured in the file /etc/request-key.conf.  Samba, Windows servers and
-many NAS appliances support DFS as a way of constructing a global name
-space to ease network configuration and improve reliability.
-
-To use cifs Kerberos and DFS support, the Linux keyutils package should be
-installed and something like the following lines should be added to the
-/etc/request-key.conf file:
-
-create cifs.spnego * * /usr/local/sbin/cifs.upcall %k
-create dns_resolver * * /usr/local/sbin/cifs.upcall %k
-
-CIFS kernel module parameters
-=============================
-These module parameters can be specified or modified either during the time of
-module loading or during the runtime by using the interface
-       /proc/module/cifs/parameters/<param>
-
-i.e. echo "value" > /sys/module/cifs/parameters/<param>
-
-1. enable_oplocks - Enable or disable oplocks. Oplocks are enabled by default.
-                   [Y/y/1]. To disable use any of [N/n/0].
-
diff --git a/fs/cifs/TODO b/fs/cifs/TODO
deleted file mode 100644 (file)
index 355abcd..0000000
+++ /dev/null
@@ -1,129 +0,0 @@
-Version 1.53 May 20, 2008
-
-A Partial List of Missing Features
-==================================
-
-Contributions are welcome.  There are plenty of opportunities
-for visible, important contributions to this module.  Here
-is a partial list of the known problems and missing features:
-
-a) Support for SecurityDescriptors(Windows/CIFS ACLs) for chmod/chgrp/chown
-so that these operations can be supported to Windows servers
-
-b) Mapping POSIX ACLs (and eventually NFSv4 ACLs) to CIFS
-SecurityDescriptors
-
-c) Better pam/winbind integration (e.g. to handle uid mapping
-better)
-
-d) Cleanup now unneeded SessSetup code in
-fs/cifs/connect.c and add back in NTLMSSP code if any servers
-need it
-
-e) fix NTLMv2 signing when two mounts with different users to same
-server.
-
-f) Directory entry caching relies on a 1 second timer, rather than 
-using FindNotify or equivalent.  - (started)
-
-g) quota support (needs minor kernel change since quota calls
-to make it to network filesystems or deviceless filesystems)
-
-h) investigate sync behavior (including syncpage) and check  
-for proper behavior of intr/nointr
-
-i) improve support for very old servers (OS/2 and Win9x for example)
-Including support for changing the time remotely (utimes command).
-
-j) hook lower into the sockets api (as NFS/SunRPC does) to avoid the
-extra copy in/out of the socket buffers in some cases.
-
-k) Better optimize open (and pathbased setfilesize) to reduce the
-oplock breaks coming from windows srv.  Piggyback identical file
-opens on top of each other by incrementing reference count rather
-than resending (helps reduce server resource utilization and avoid
-spurious oplock breaks).
-
-l) Improve performance of readpages by sending more than one read
-at a time when 8 pages or more are requested. In conjuntion
-add support for async_cifs_readpages.
-
-m) Add support for storing symlink info to Windows servers 
-in the Extended Attribute format their SFU clients would recognize.
-
-n) Finish fcntl D_NOTIFY support so kde and gnome file list windows
-will autorefresh (partially complete by Asser). Needs minor kernel
-vfs change to support removing D_NOTIFY on a file.   
-
-o) Add GUI tool to configure /proc/fs/cifs settings and for display of
-the CIFS statistics (started)
-
-p) implement support for security and trusted categories of xattrs
-(requires minor protocol extension) to enable better support for SELINUX
-
-q) Implement O_DIRECT flag on open (already supported on mount)
-
-r) Create UID mapping facility so server UIDs can be mapped on a per
-mount or a per server basis to client UIDs or nobody if no mapping
-exists.  This is helpful when Unix extensions are negotiated to
-allow better permission checking when UIDs differ on the server
-and client.  Add new protocol request to the CIFS protocol 
-standard for asking the server for the corresponding name of a
-particular uid.
-
-s) Add support for CIFS Unix and also the newer POSIX extensions to the
-server side for Samba 4.
-
-t) In support for OS/2 (LANMAN 1.2 and LANMAN2.1 based SMB servers) 
-need to add ability to set time to server (utimes command)
-
-u) DOS attrs - returned as pseudo-xattr in Samba format (check VFAT and NTFS for this too)
-
-v) mount check for unmatched uids
-
-w) Add support for new vfs entry point for fallocate
-
-x) Fix Samba 3 server to handle Linux kernel aio so dbench with lots of 
-processes can proceed better in parallel (on the server)
-
-y) Fix Samba 3 to handle reads/writes over 127K (and remove the cifs mount
-restriction of wsize max being 127K) 
-
-KNOWN BUGS (updated April 24, 2007)
-====================================
-See http://bugzilla.samba.org - search on product "CifsVFS" for
-current bug list.
-
-1) existing symbolic links (Windows reparse points) are recognized but
-can not be created remotely. They are implemented for Samba and those that
-support the CIFS Unix extensions, although earlier versions of Samba
-overly restrict the pathnames.
-2) follow_link and readdir code does not follow dfs junctions
-but recognizes them
-3) create of new files to FAT partitions on Windows servers can
-succeed but still return access denied (appears to be Windows 
-server not cifs client problem) and has not been reproduced recently.
-NTFS partitions do not have this problem.
-4) Unix/POSIX capabilities are reset after reconnection, and affect
-a few fields in the tree connection but we do do not know which
-superblocks to apply these changes to.  We should probably walk
-the list of superblocks to set these.  Also need to check the
-flags on the second mount to the same share, and see if we
-can do the same trick that NFS does to remount duplicate shares.
-
-Misc testing to do
-==================
-1) check out max path names and max path name components against various server
-types. Try nested symlinks (8 deep). Return max path name in stat -f information
-
-2) Modify file portion of ltp so it can run against a mounted network
-share and run it against cifs vfs in automated fashion.
-
-3) Additional performance testing and optimization using iozone and similar - 
-there are some easy changes that can be done to parallelize sequential writes,
-and when signing is disabled to request larger read sizes (larger than 
-negotiated size) and send larger write sizes to modern servers.
-
-4) More exhaustively test against less common servers.  More testing
-against Windows 9x, Windows ME servers.
-
index fe8d627..d8eac3b 100644 (file)
@@ -91,6 +91,8 @@ extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen,
 #endif /* CONFIG_CIFS_SMB2 */
 #endif
 
+wchar_t cifs_toupper(wchar_t in);
+
 /*
  * UniStrcat:  Concatenate the second string to the first
  *
index 85ea98d..a16b4e5 100644 (file)
@@ -255,6 +255,7 @@ cifs_alloc_inode(struct super_block *sb)
        cifs_inode->server_eof = 0;
        cifs_inode->uniqueid = 0;
        cifs_inode->createtime = 0;
+       cifs_inode->epoch = 0;
 #ifdef CONFIG_CIFS_SMB2
        get_random_bytes(cifs_inode->lease_key, SMB2_LEASE_KEY_SIZE);
 #endif
@@ -357,6 +358,18 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
                seq_printf(s, "loose");
 }
 
+static void
+cifs_show_nls(struct seq_file *s, struct nls_table *cur)
+{
+       struct nls_table *def;
+
+       /* Display iocharset= option if it's not default charset */
+       def = load_nls_default();
+       if (def != cur)
+               seq_printf(s, ",iocharset=%s", cur->charset);
+       unload_nls(def);
+}
+
 /*
  * cifs_show_options() is for displaying mount options in /proc/mounts.
  * Not all settable options are displayed but most of the important
@@ -418,6 +431,9 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
                                           cifs_sb->mnt_file_mode,
                                           cifs_sb->mnt_dir_mode);
+
+       cifs_show_nls(s, cifs_sb->local_nls);
+
        if (tcon->seal)
                seq_printf(s, ",seal");
        if (tcon->nocase)
@@ -718,7 +734,7 @@ static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
 
        written = generic_file_aio_write(iocb, iov, nr_segs, pos);
 
-       if (CIFS_I(inode)->clientCanCacheAll)
+       if (CIFS_CACHE_WRITE(CIFS_I(inode)))
                return written;
 
        rc = filemap_fdatawrite(inode->i_mapping);
@@ -743,7 +759,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
                 * We need to be sure that all dirty pages are written and the
                 * server has the newest file length.
                 */
-               if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+               if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping &&
                    inode->i_mapping->nrpages != 0) {
                        rc = filemap_fdatawait(inode->i_mapping);
                        if (rc) {
@@ -767,8 +783,10 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 
 static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 {
-       /* note that this is called by vfs setlease with i_lock held
-          to protect *lease from going away */
+       /*
+        * Note that this is called by vfs setlease with i_lock held to
+        * protect *lease from going away.
+        */
        struct inode *inode = file_inode(file);
        struct cifsFileInfo *cfile = file->private_data;
 
@@ -776,20 +794,19 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
                return -EINVAL;
 
        /* check if file is oplocked */
-       if (((arg == F_RDLCK) &&
-               (CIFS_I(inode)->clientCanCacheRead)) ||
-           ((arg == F_WRLCK) &&
-               (CIFS_I(inode)->clientCanCacheAll)))
+       if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
+           ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
                return generic_setlease(file, arg, lease);
        else if (tlink_tcon(cfile->tlink)->local_lease &&
-                !CIFS_I(inode)->clientCanCacheRead)
-               /* If the server claims to support oplock on this
-                  file, then we still need to check oplock even
-                  if the local_lease mount option is set, but there
-                  are servers which do not support oplock for which
-                  this mount option may be useful if the user
-                  knows that the file won't be changed on the server
-                  by anyone else */
+                !CIFS_CACHE_READ(CIFS_I(inode)))
+               /*
+                * If the server claims to support oplock on this file, then we
+                * still need to check oplock even if the local_lease mount
+                * option is set, but there are servers which do not support
+                * oplock for which this mount option may be useful if the user
+                * knows that the file won't be changed on the server by anyone
+                * else.
+                */
                return generic_setlease(file, arg, lease);
        else
                return -EAGAIN;
index 52ca861..cfa14c8 100644 (file)
@@ -28,6 +28,7 @@
 #include "cifsacl.h"
 #include <crypto/internal/hash.h>
 #include <linux/scatterlist.h>
+#include <uapi/linux/cifs/cifs_mount.h>
 #ifdef CONFIG_CIFS_SMB2
 #include "smb2pdu.h"
 #endif
 #define MAX_SES_INFO 2
 #define MAX_TCON_INFO 4
 
-#define MAX_TREE_SIZE (2 + MAX_SERVER_SIZE + 1 + MAX_SHARE_SIZE + 1)
-#define MAX_SERVER_SIZE 15
-#define MAX_SHARE_SIZE 80
-#define CIFS_MAX_DOMAINNAME_LEN 256 /* max domain name length */
-#define MAX_USERNAME_SIZE 256  /* reasonable maximum for current servers */
-#define MAX_PASSWORD_SIZE 512  /* max for windows seems to be 256 wide chars */
+#define MAX_TREE_SIZE (2 + CIFS_NI_MAXHOST + 1 + CIFS_MAX_SHARE_LEN + 1)
 
 #define CIFS_MIN_RCV_POOL 4
 
@@ -135,6 +131,7 @@ struct cifs_secmech {
 
 /* per smb session structure/fields */
 struct ntlmssp_auth {
+       bool sesskey_per_smbsess; /* whether session key is per smb session */
        __u32 client_flags; /* sent by client in type 1 ntlmsssp exchange */
        __u32 server_flags; /* sent by server in type 2 ntlmssp exchange */
        unsigned char ciphertext[CIFS_CPHTXT_SIZE]; /* sent to server */
@@ -308,6 +305,9 @@ struct smb_version_operations {
        int (*create_hardlink)(const unsigned int, struct cifs_tcon *,
                               const char *, const char *,
                               struct cifs_sb_info *);
+       /* query symlink target */
+       int (*query_symlink)(const unsigned int, struct cifs_tcon *,
+                            const char *, char **, struct cifs_sb_info *);
        /* open a file for non-posix mounts */
        int (*open)(const unsigned int, struct cifs_open_parms *,
                    __u32 *, FILE_ALL_INFO *);
@@ -361,18 +361,24 @@ struct smb_version_operations {
        /* push brlocks from the cache to the server */
        int (*push_mand_locks)(struct cifsFileInfo *);
        /* get lease key of the inode */
-       void (*get_lease_key)(struct inode *, struct cifs_fid *fid);
+       void (*get_lease_key)(struct inode *, struct cifs_fid *);
        /* set lease key of the inode */
-       void (*set_lease_key)(struct inode *, struct cifs_fid *fid);
+       void (*set_lease_key)(struct inode *, struct cifs_fid *);
        /* generate new lease key */
-       void (*new_lease_key)(struct cifs_fid *fid);
-       /* The next two functions will need to be changed to per smb session */
-       void (*generate_signingkey)(struct TCP_Server_Info *server);
-       int (*calc_signature)(struct smb_rqst *rqst,
-                                  struct TCP_Server_Info *server);
-       int (*query_mf_symlink)(const unsigned char *path, char *pbuf,
-                       unsigned int *pbytes_read, struct cifs_sb_info *cifs_sb,
-                       unsigned int xid);
+       void (*new_lease_key)(struct cifs_fid *);
+       int (*generate_signingkey)(struct cifs_ses *);
+       int (*calc_signature)(struct smb_rqst *, struct TCP_Server_Info *);
+       int (*query_mf_symlink)(const unsigned char *, char *, unsigned int *,
+                               struct cifs_sb_info *, unsigned int);
+       /* if we can do cache read operations */
+       bool (*is_read_op)(__u32);
+       /* set oplock level for the inode */
+       void (*set_oplock_level)(struct cifsInodeInfo *, __u32, unsigned int,
+                                bool *);
+       /* create lease context buffer for CREATE request */
+       char * (*create_lease_buf)(u8 *, u8);
+       /* parse lease context buffer and return oplock/epoch info */
+       __u8 (*parse_lease_buf)(void *, unsigned int *);
 };
 
 struct smb_version_values {
@@ -390,9 +396,9 @@ struct smb_version_values {
        unsigned int    cap_unix;
        unsigned int    cap_nt_find;
        unsigned int    cap_large_files;
-       unsigned int    oplock_read;
        __u16           signing_enabled;
        __u16           signing_required;
+       size_t          create_lease_size;
 };
 
 #define HEADER_SIZE(server) (server->vals->header_size)
@@ -548,7 +554,6 @@ struct TCP_Server_Info {
        int timeAdj;  /* Adjust for difference in server time zone in sec */
        __u64 CurrentMid;         /* multiplex id - rotating counter */
        char cryptkey[CIFS_CRYPTO_KEY_SIZE]; /* used by ntlm, ntlmv2 etc */
-       char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
        /* 16th byte of RFC1001 workstation name is always null */
        char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
        __u32 sequence_number; /* for signing, protected by srv_mutex */
@@ -731,6 +736,7 @@ struct cifs_ses {
        bool need_reconnect:1; /* connection reset, uid now invalid */
 #ifdef CONFIG_CIFS_SMB2
        __u16 session_flags;
+       char smb3signingkey[SMB3_SIGN_KEY_SIZE]; /* for signing smb3 packets */
 #endif /* CONFIG_CIFS_SMB2 */
 };
 
@@ -935,6 +941,8 @@ struct cifs_fid {
        __u8 lease_key[SMB2_LEASE_KEY_SIZE];    /* lease key for smb2 */
 #endif
        struct cifs_pending_open *pending_open;
+       unsigned int epoch;
+       bool purge_cache;
 };
 
 struct cifs_fid_locks {
@@ -1032,6 +1040,17 @@ cifsFileInfo_get_locked(struct cifsFileInfo *cifs_file)
 struct cifsFileInfo *cifsFileInfo_get(struct cifsFileInfo *cifs_file);
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file);
 
+#define CIFS_CACHE_READ_FLG    1
+#define CIFS_CACHE_HANDLE_FLG  2
+#define CIFS_CACHE_RH_FLG      (CIFS_CACHE_READ_FLG | CIFS_CACHE_HANDLE_FLG)
+#define CIFS_CACHE_WRITE_FLG   4
+#define CIFS_CACHE_RW_FLG      (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG)
+#define CIFS_CACHE_RHW_FLG     (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG)
+
+#define CIFS_CACHE_READ(cinode) (cinode->oplock & CIFS_CACHE_READ_FLG)
+#define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG)
+#define CIFS_CACHE_WRITE(cinode) (cinode->oplock & CIFS_CACHE_WRITE_FLG)
+
 /*
  * One of these for each file inode
  */
@@ -1043,8 +1062,8 @@ struct cifsInodeInfo {
        /* BB add in lists for dirty pages i.e. write caching info for oplock */
        struct list_head openFileList;
        __u32 cifsAttrs; /* e.g. DOS archive bit, sparse, compressed, system */
-       bool clientCanCacheRead;        /* read oplock */
-       bool clientCanCacheAll;         /* read and writebehind oplock */
+       unsigned int oplock;            /* oplock/lease level we have */
+       unsigned int epoch;             /* used to track lease state changes */
        bool delete_pending;            /* DELETE_ON_CLOSE is set */
        bool invalid_mapping;           /* pagecache is invalid */
        unsigned long time;             /* jiffies of last update of inode */
@@ -1502,7 +1521,7 @@ extern mempool_t *cifs_mid_poolp;
 extern struct smb_version_operations smb1_operations;
 extern struct smb_version_values smb1_values;
 #define SMB20_VERSION_STRING   "2.0"
-/*extern struct smb_version_operations smb20_operations; */ /* not needed yet */
+extern struct smb_version_operations smb20_operations;
 extern struct smb_version_values smb20_values;
 #define SMB21_VERSION_STRING   "2.1"
 extern struct smb_version_operations smb21_operations;
index 11ca24a..948676d 100644 (file)
@@ -1495,11 +1495,12 @@ struct reparse_data {
        __u32   ReparseTag;
        __u16   ReparseDataLength;
        __u16   Reserved;
-       __u16   AltNameOffset;
-       __u16   AltNameLen;
-       __u16   TargetNameOffset;
-       __u16   TargetNameLen;
-       char    LinkNamesBuf[1];
+       __u16   SubstituteNameOffset;
+       __u16   SubstituteNameLength;
+       __u16   PrintNameOffset;
+       __u16   PrintNameLength;
+       __u32   Flags;
+       char    PathBuffer[0];
 } __attribute__((packed));
 
 struct cifs_quota_data {
index b29a012..b5ec2a2 100644 (file)
@@ -357,13 +357,9 @@ extern int CIFSSMBUnixQuerySymLink(const unsigned int xid,
                        struct cifs_tcon *tcon,
                        const unsigned char *searchName, char **syminfo,
                        const struct nls_table *nls_codepage);
-#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
-extern int CIFSSMBQueryReparseLinkInfo(const unsigned int xid,
-                       struct cifs_tcon *tcon,
-                       const unsigned char *searchName,
-                       char *symlinkinfo, const int buflen, __u16 fid,
-                       const struct nls_table *nls_codepage);
-#endif /* temporarily unused until cifs_symlink fixed */
+extern int CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
+                              __u16 fid, char **symlinkinfo,
+                              const struct nls_table *nls_codepage);
 extern int CIFSSMBOpen(const unsigned int xid, struct cifs_tcon *tcon,
                        const char *fileName, const int disposition,
                        const int access_flags, const int omode,
@@ -435,7 +431,7 @@ extern int setup_ntlm_response(struct cifs_ses *, const struct nls_table *);
 extern int setup_ntlmv2_rsp(struct cifs_ses *, const struct nls_table *);
 extern void cifs_crypto_shash_release(struct TCP_Server_Info *);
 extern int calc_seckey(struct cifs_ses *);
-extern void generate_smb3signingkey(struct TCP_Server_Info *);
+extern int generate_smb3signingkey(struct cifs_ses *);
 
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 extern int calc_lanman_hash(const char *password, const char *cryptkey,
index a89c4cb..a3d74fe 100644 (file)
@@ -3067,7 +3067,6 @@ querySymLinkRetry:
        return rc;
 }
 
-#ifdef CONFIG_CIFS_SYMLINK_EXPERIMENTAL
 /*
  *     Recent Windows versions now create symlinks more frequently
  *     and they use the "reparse point" mechanism below.  We can of course
@@ -3079,18 +3078,22 @@ querySymLinkRetry:
  *     it is not compiled in by default until callers fixed up and more tested.
  */
 int
-CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon,
-                       const unsigned char *searchName,
-                       char *symlinkinfo, const int buflen, __u16 fid,
-                       const struct nls_table *nls_codepage)
+CIFSSMBQuerySymLink(const unsigned int xid, struct cifs_tcon *tcon,
+                   __u16 fid, char **symlinkinfo,
+                   const struct nls_table *nls_codepage)
 {
        int rc = 0;
        int bytes_returned;
        struct smb_com_transaction_ioctl_req *pSMB;
        struct smb_com_transaction_ioctl_rsp *pSMBr;
+       bool is_unicode;
+       unsigned int sub_len;
+       char *sub_start;
+       struct reparse_data *reparse_buf;
+       __u32 data_offset, data_count;
+       char *end_of_smb;
 
-       cifs_dbg(FYI, "In Windows reparse style QueryLink for path %s\n",
-                searchName);
+       cifs_dbg(FYI, "In Windows reparse style QueryLink for fid %u\n", fid);
        rc = smb_init(SMB_COM_NT_TRANSACT, 23, tcon, (void **) &pSMB,
                      (void **) &pSMBr);
        if (rc)
@@ -3119,66 +3122,55 @@ CIFSSMBQueryReparseLinkInfo(const unsigned int xid, struct cifs_tcon *tcon,
                         (struct smb_hdr *) pSMBr, &bytes_returned, 0);
        if (rc) {
                cifs_dbg(FYI, "Send error in QueryReparseLinkInfo = %d\n", rc);
-       } else {                /* decode response */
-               __u32 data_offset = le32_to_cpu(pSMBr->DataOffset);
-               __u32 data_count = le32_to_cpu(pSMBr->DataCount);
-               if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
-                       /* BB also check enough total bytes returned */
-                       rc = -EIO;      /* bad smb */
-                       goto qreparse_out;
-               }
-               if (data_count && (data_count < 2048)) {
-                       char *end_of_smb = 2 /* sizeof byte count */ +
-                              get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
-
-                       struct reparse_data *reparse_buf =
-                                               (struct reparse_data *)
-                                               ((char *)&pSMBr->hdr.Protocol
-                                                                + data_offset);
-                       if ((char *)reparse_buf >= end_of_smb) {
-                               rc = -EIO;
-                               goto qreparse_out;
-                       }
-                       if ((reparse_buf->LinkNamesBuf +
-                               reparse_buf->TargetNameOffset +
-                               reparse_buf->TargetNameLen) > end_of_smb) {
-                               cifs_dbg(FYI, "reparse buf beyond SMB\n");
-                               rc = -EIO;
-                               goto qreparse_out;
-                       }
+               goto qreparse_out;
+       }
 
-                       if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE) {
-                               cifs_from_ucs2(symlinkinfo, (__le16 *)
-                                               (reparse_buf->LinkNamesBuf +
-                                               reparse_buf->TargetNameOffset),
-                                               buflen,
-                                               reparse_buf->TargetNameLen,
-                                               nls_codepage, 0);
-                       } else { /* ASCII names */
-                               strncpy(symlinkinfo,
-                                       reparse_buf->LinkNamesBuf +
-                                       reparse_buf->TargetNameOffset,
-                                       min_t(const int, buflen,
-                                          reparse_buf->TargetNameLen));
-                       }
-               } else {
-                       rc = -EIO;
-                       cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
-               }
-               symlinkinfo[buflen] = 0; /* just in case so the caller
-                                       does not go off the end of the buffer */
-               cifs_dbg(FYI, "readlink result - %s\n", symlinkinfo);
+       data_offset = le32_to_cpu(pSMBr->DataOffset);
+       data_count = le32_to_cpu(pSMBr->DataCount);
+       if (get_bcc(&pSMBr->hdr) < 2 || data_offset > 512) {
+               /* BB also check enough total bytes returned */
+               rc = -EIO;      /* bad smb */
+               goto qreparse_out;
+       }
+       if (!data_count || (data_count > 2048)) {
+               rc = -EIO;
+               cifs_dbg(FYI, "Invalid return data count on get reparse info ioctl\n");
+               goto qreparse_out;
+       }
+       end_of_smb = 2 + get_bcc(&pSMBr->hdr) + (char *)&pSMBr->ByteCount;
+       reparse_buf = (struct reparse_data *)
+                               ((char *)&pSMBr->hdr.Protocol + data_offset);
+       if ((char *)reparse_buf >= end_of_smb) {
+               rc = -EIO;
+               goto qreparse_out;
        }
+       if ((reparse_buf->PathBuffer + reparse_buf->PrintNameOffset +
+                               reparse_buf->PrintNameLength) > end_of_smb) {
+               cifs_dbg(FYI, "reparse buf beyond SMB\n");
+               rc = -EIO;
+               goto qreparse_out;
+       }
+       sub_start = reparse_buf->SubstituteNameOffset + reparse_buf->PathBuffer;
+       sub_len = reparse_buf->SubstituteNameLength;
+       if (pSMBr->hdr.Flags2 & SMBFLG2_UNICODE)
+               is_unicode = true;
+       else
+               is_unicode = false;
 
+       /* BB FIXME investigate remapping reserved chars here */
+       *symlinkinfo = cifs_strndup_from_utf16(sub_start, sub_len, is_unicode,
+                                              nls_codepage);
+       if (!*symlinkinfo)
+               rc = -ENOMEM;
 qreparse_out:
        cifs_buf_release(pSMB);
 
-       /* Note: On -EAGAIN error only caller can retry on handle based calls
-               since file handle passed in no longer valid */
-
+       /*
+        * Note: On -EAGAIN error only caller can retry on handle based calls
+        * since file handle passed in no longer valid.
+        */
        return rc;
 }
-#endif /* CIFS_SYMLINK_EXPERIMENTAL */ /* BB temporarily unused */
 
 #ifdef CONFIG_CIFS_POSIX
 
index d67c550..a279ffc 100644 (file)
@@ -379,6 +379,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                try_to_freeze();
 
                /* we should try only the port we connected to before */
+               mutex_lock(&server->srv_mutex);
                rc = generic_ip_connect(server);
                if (rc) {
                        cifs_dbg(FYI, "reconnect error %d\n", rc);
@@ -390,6 +391,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
                                server->tcpStatus = CifsNeedNegotiate;
                        spin_unlock(&GlobalMid_Lock);
                }
+               mutex_unlock(&server->srv_mutex);
        } while (server->tcpStatus == CifsNeedReconnect);
 
        return rc;
@@ -1114,7 +1116,7 @@ cifs_parse_smb_version(char *value, struct smb_vol *vol)
                break;
 #ifdef CONFIG_CIFS_SMB2
        case Smb_20:
-               vol->ops = &smb21_operations; /* currently identical with 2.1 */
+               vol->ops = &smb20_operations;
                vol->vals = &smb20_values;
                break;
        case Smb_21:
@@ -1575,8 +1577,8 @@ cifs_parse_mount_options(const char *mountdata, const char *devname,
                        if (string == NULL)
                                goto out_nomem;
 
-                       if (strnlen(string, MAX_USERNAME_SIZE) >
-                                                       MAX_USERNAME_SIZE) {
+                       if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
+                                                       CIFS_MAX_USERNAME_LEN) {
                                printk(KERN_WARNING "CIFS: username too long\n");
                                goto cifs_parse_mount_err;
                        }
@@ -2221,13 +2223,13 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
                /* anything else takes username/password */
                if (strncmp(ses->user_name,
                            vol->username ? vol->username : "",
-                           MAX_USERNAME_SIZE))
+                           CIFS_MAX_USERNAME_LEN))
                        return 0;
                if (strlen(vol->username) != 0 &&
                    ses->password != NULL &&
                    strncmp(ses->password,
                            vol->password ? vol->password : "",
-                           MAX_PASSWORD_SIZE))
+                           CIFS_MAX_PASSWORD_LEN))
                        return 0;
        }
        return 1;
@@ -2352,7 +2354,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
        }
 
        len = delim - payload;
-       if (len > MAX_USERNAME_SIZE || len <= 0) {
+       if (len > CIFS_MAX_USERNAME_LEN || len <= 0) {
                cifs_dbg(FYI, "Bad value from username search (len=%zd)\n",
                         len);
                rc = -EINVAL;
@@ -2369,7 +2371,7 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
        cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username);
 
        len = key->datalen - (len + 1);
-       if (len > MAX_PASSWORD_SIZE || len <= 0) {
+       if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) {
                cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len);
                rc = -EINVAL;
                kfree(vol->username);
@@ -3826,33 +3828,8 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
        if (server->ops->sess_setup)
                rc = server->ops->sess_setup(xid, ses, nls_info);
 
-       if (rc) {
+       if (rc)
                cifs_dbg(VFS, "Send error in SessSetup = %d\n", rc);
-       } else {
-               mutex_lock(&server->srv_mutex);
-               if (!server->session_estab) {
-                       server->session_key.response = ses->auth_key.response;
-                       server->session_key.len = ses->auth_key.len;
-                       server->sequence_number = 0x2;
-                       server->session_estab = true;
-                       ses->auth_key.response = NULL;
-                       if (server->ops->generate_signingkey)
-                               server->ops->generate_signingkey(server);
-               }
-               mutex_unlock(&server->srv_mutex);
-
-               cifs_dbg(FYI, "CIFS Session Established successfully\n");
-               spin_lock(&GlobalMid_Lock);
-               ses->status = CifsGood;
-               ses->need_reconnect = false;
-               spin_unlock(&GlobalMid_Lock);
-       }
-
-       kfree(ses->auth_key.response);
-       ses->auth_key.response = NULL;
-       ses->auth_key.len = 0;
-       kfree(ses->ntlmssp);
-       ses->ntlmssp = NULL;
 
        return rc;
 }
index d62ce0d..d3e2eaa 100644 (file)
@@ -32,6 +32,7 @@
 #include "cifsproto.h"
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
+#include "cifs_unicode.h"
 
 static void
 renew_parental_timestamps(struct dentry *direntry)
@@ -834,12 +835,17 @@ static int cifs_ci_hash(const struct dentry *dentry, struct qstr *q)
 {
        struct nls_table *codepage = CIFS_SB(dentry->d_sb)->local_nls;
        unsigned long hash;
-       int i;
+       wchar_t c;
+       int i, charlen;
 
        hash = init_name_hash();
-       for (i = 0; i < q->len; i++)
-               hash = partial_name_hash(nls_tolower(codepage, q->name[i]),
-                                        hash);
+       for (i = 0; i < q->len; i += charlen) {
+               charlen = codepage->char2uni(&q->name[i], q->len - i, &c);
+               /* error out if we can't convert the character */
+               if (unlikely(charlen < 0))
+                       return charlen;
+               hash = partial_name_hash(cifs_toupper(c), hash);
+       }
        q->hash = end_name_hash(hash);
 
        return 0;
@@ -849,11 +855,47 @@ static int cifs_ci_compare(const struct dentry *parent, const struct dentry *den
                unsigned int len, const char *str, const struct qstr *name)
 {
        struct nls_table *codepage = CIFS_SB(parent->d_sb)->local_nls;
+       wchar_t c1, c2;
+       int i, l1, l2;
 
-       if ((name->len == len) &&
-           (nls_strnicmp(codepage, name->name, str, len) == 0))
-               return 0;
-       return 1;
+       /*
+        * We make the assumption here that uppercase characters in the local
+        * codepage are always the same length as their lowercase counterparts.
+        *
+        * If that's ever not the case, then this will fail to match it.
+        */
+       if (name->len != len)
+               return 1;
+
+       for (i = 0; i < len; i += l1) {
+               /* Convert characters in both strings to UTF-16. */
+               l1 = codepage->char2uni(&str[i], len - i, &c1);
+               l2 = codepage->char2uni(&name->name[i], name->len - i, &c2);
+
+               /*
+                * If we can't convert either character, just declare it to
+                * be 1 byte long and compare the original byte.
+                */
+               if (unlikely(l1 < 0 && l2 < 0)) {
+                       if (str[i] != name->name[i])
+                               return 1;
+                       l1 = 1;
+                       continue;
+               }
+
+               /*
+                * Here, we again ass|u|me that upper/lowercase versions of
+                * a character are the same length in the local NLS.
+                */
+               if (l1 != l2)
+                       return 1;
+
+               /* Now compare uppercase versions of these characters */
+               if (cifs_toupper(c1) != cifs_toupper(c2))
+                       return 1;
+       }
+
+       return 0;
 }
 
 const struct dentry_operations cifs_ci_dentry_ops = {
index 9d0dd95..eb955b5 100644 (file)
@@ -313,8 +313,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
         * If the server returned a read oplock and we have mandatory brlocks,
         * set oplock level to None.
         */
-       if (oplock == server->vals->oplock_read &&
-                                               cifs_has_mand_locks(cinode)) {
+       if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
                cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
                oplock = 0;
        }
@@ -324,6 +323,7 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                oplock = fid->pending_open->oplock;
        list_del(&fid->pending_open->olist);
 
+       fid->purge_cache = false;
        server->ops->set_fid(cfile, fid, oplock);
 
        list_add(&cfile->tlist, &tcon->openFileList);
@@ -334,6 +334,9 @@ cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
                list_add_tail(&cfile->flist, &cinode->openFileList);
        spin_unlock(&cifs_file_list_lock);
 
+       if (fid->purge_cache)
+               cifs_invalidate_mapping(inode);
+
        file->private_data = cfile;
        return cfile;
 }
@@ -1524,12 +1527,12 @@ cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
                 * read won't conflict with non-overlapted locks due to
                 * pagereading.
                 */
-               if (!CIFS_I(inode)->clientCanCacheAll &&
-                                       CIFS_I(inode)->clientCanCacheRead) {
+               if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
+                                       CIFS_CACHE_READ(CIFS_I(inode))) {
                        cifs_invalidate_mapping(inode);
                        cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
                                 inode);
-                       CIFS_I(inode)->clientCanCacheRead = false;
+                       CIFS_I(inode)->oplock = 0;
                }
 
                rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
@@ -2213,7 +2216,7 @@ int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
        cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
                 file->f_path.dentry->d_name.name, datasync);
 
-       if (!CIFS_I(inode)->clientCanCacheRead) {
+       if (!CIFS_CACHE_READ(CIFS_I(inode))) {
                rc = cifs_invalidate_mapping(inode);
                if (rc) {
                        cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
@@ -2577,7 +2580,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        ssize_t written;
 
-       if (cinode->clientCanCacheAll) {
+       if (CIFS_CACHE_WRITE(cinode)) {
                if (cap_unix(tcon->ses) &&
                (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
                    && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
@@ -2591,7 +2594,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
         * these pages but not on the region from pos to ppos+len-1.
         */
        written = cifs_user_writev(iocb, iov, nr_segs, pos);
-       if (written > 0 && cinode->clientCanCacheRead) {
+       if (written > 0 && CIFS_CACHE_READ(cinode)) {
                /*
                 * Windows 7 server can delay breaking level2 oplock if a write
                 * request comes - break it on the client to prevent reading
@@ -2600,7 +2603,7 @@ cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
                cifs_invalidate_mapping(inode);
                cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
                         inode);
-               cinode->clientCanCacheRead = false;
+               cinode->oplock = 0;
        }
        return written;
 }
@@ -2957,7 +2960,7 @@ cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
         * on pages affected by this read but not on the region from pos to
         * pos+len-1.
         */
-       if (!cinode->clientCanCacheRead)
+       if (!CIFS_CACHE_READ(cinode))
                return cifs_user_readv(iocb, iov, nr_segs, pos);
 
        if (cap_unix(tcon->ses) &&
@@ -3093,7 +3096,7 @@ int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
 
        xid = get_xid();
 
-       if (!CIFS_I(inode)->clientCanCacheRead) {
+       if (!CIFS_CACHE_READ(CIFS_I(inode))) {
                rc = cifs_invalidate_mapping(inode);
                if (rc)
                        return rc;
@@ -3376,6 +3379,9 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
        return rc;
 }
 
+/*
+ * cifs_readpage_worker must be called with the page pinned
+ */
 static int cifs_readpage_worker(struct file *file, struct page *page,
        loff_t *poffset)
 {
@@ -3387,7 +3393,6 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
        if (rc == 0)
                goto read_complete;
 
-       page_cache_get(page);
        read_data = kmap(page);
        /* for reads over a certain size could initiate async read ahead */
 
@@ -3414,7 +3419,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
 
 io_error:
        kunmap(page);
-       page_cache_release(page);
+       unlock_page(page);
 
 read_complete:
        return rc;
@@ -3439,8 +3444,6 @@ static int cifs_readpage(struct file *file, struct page *page)
 
        rc = cifs_readpage_worker(file, page, &offset);
 
-       unlock_page(page);
-
        free_xid(xid);
        return rc;
 }
@@ -3494,6 +3497,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
                        loff_t pos, unsigned len, unsigned flags,
                        struct page **pagep, void **fsdata)
 {
+       int oncethru = 0;
        pgoff_t index = pos >> PAGE_CACHE_SHIFT;
        loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
        loff_t page_start = pos & PAGE_MASK;
@@ -3503,6 +3507,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
 
        cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
 
+start:
        page = grab_cache_page_write_begin(mapping, index, flags);
        if (!page) {
                rc = -ENOMEM;
@@ -3526,7 +3531,7 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
         * is, when the page lies beyond the EOF, or straddles the EOF
         * and the write will cover all of the existing data.
         */
-       if (CIFS_I(mapping->host)->clientCanCacheRead) {
+       if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
                i_size = i_size_read(mapping->host);
                if (page_start >= i_size ||
                    (offset == 0 && (pos + len) >= i_size)) {
@@ -3544,13 +3549,16 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
                }
        }
 
-       if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
+       if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
                /*
                 * might as well read a page, it is fast enough. If we get
                 * an error, we don't need to return it. cifs_write_end will
                 * do a sync write instead since PG_uptodate isn't set.
                 */
                cifs_readpage_worker(file, page, &page_start);
+               page_cache_release(page);
+               oncethru = 1;
+               goto start;
        } else {
                /* we could try using another file handle if there is one -
                   but how would we lock it to prevent close of that handle
@@ -3609,20 +3617,20 @@ void cifs_oplock_break(struct work_struct *work)
        struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
        int rc = 0;
 
-       if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
+       if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
                                                cifs_has_mand_locks(cinode)) {
                cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
                         inode);
-               cinode->clientCanCacheRead = false;
+               cinode->oplock = 0;
        }
 
        if (inode && S_ISREG(inode->i_mode)) {
-               if (cinode->clientCanCacheRead)
+               if (CIFS_CACHE_READ(cinode))
                        break_lease(inode, O_RDONLY);
                else
                        break_lease(inode, O_WRONLY);
                rc = filemap_fdatawrite(inode->i_mapping);
-               if (cinode->clientCanCacheRead == 0) {
+               if (!CIFS_CACHE_READ(cinode)) {
                        rc = filemap_fdatawait(inode->i_mapping);
                        mapping_set_error(inode->i_mapping, rc);
                        cifs_invalidate_mapping(inode);
index 449b6cf..f9ff9c1 100644 (file)
@@ -101,7 +101,7 @@ cifs_revalidate_cache(struct inode *inode, struct cifs_fattr *fattr)
        }
 
        /* don't bother with revalidation if we have an oplock */
-       if (cifs_i->clientCanCacheRead) {
+       if (CIFS_CACHE_READ(cifs_i)) {
                cifs_dbg(FYI, "%s: inode %llu is oplocked\n",
                         __func__, cifs_i->uniqueid);
                return;
@@ -549,6 +549,10 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
                 * when Unix extensions are disabled - fake it.
                 */
                fattr->cf_nlink = 2;
+       } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
+               fattr->cf_mode = S_IFLNK;
+               fattr->cf_dtype = DT_LNK;
+               fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
        } else {
                fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
                fattr->cf_dtype = DT_REG;
@@ -646,7 +650,7 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
        cifs_dbg(FYI, "Getting info on %s\n", full_path);
 
        if ((data == NULL) && (*inode != NULL)) {
-               if (CIFS_I(*inode)->clientCanCacheRead) {
+               if (CIFS_CACHE_READ(CIFS_I(*inode))) {
                        cifs_dbg(FYI, "No need to revalidate cached inode sizes\n");
                        goto cgii_exit;
                }
@@ -1657,7 +1661,7 @@ cifs_inode_needs_reval(struct inode *inode)
        struct cifsInodeInfo *cifs_i = CIFS_I(inode);
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
 
-       if (cifs_i->clientCanCacheRead)
+       if (CIFS_CACHE_READ(cifs_i))
                return false;
 
        if (!lookupCacheEnabled)
@@ -1800,7 +1804,7 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
         * We need to be sure that all dirty pages are written and the server
         * has actual ctime, mtime and file length.
         */
-       if (!CIFS_I(inode)->clientCanCacheRead && inode->i_mapping &&
+       if (!CIFS_CACHE_READ(CIFS_I(inode)) && inode->i_mapping &&
            inode->i_mapping->nrpages != 0) {
                rc = filemap_fdatawait(inode->i_mapping);
                if (rc) {
@@ -1852,14 +1856,11 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
 
 static void cifs_setsize(struct inode *inode, loff_t offset)
 {
-       loff_t oldsize;
-
        spin_lock(&inode->i_lock);
-       oldsize = inode->i_size;
        i_size_write(inode, offset);
        spin_unlock(&inode->i_lock);
 
-       truncate_pagecache(inode, oldsize, offset);
+       truncate_pagecache(inode, offset);
 }
 
 static int
index 562044f..7e36ceb 100644 (file)
@@ -509,6 +509,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
        struct tcon_link *tlink = NULL;
        struct cifs_tcon *tcon;
+       struct TCP_Server_Info *server;
 
        xid = get_xid();
 
@@ -519,25 +520,7 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
                goto out;
        }
        tcon = tlink_tcon(tlink);
-
-       /*
-        * For now, we just handle symlinks with unix extensions enabled.
-        * Eventually we should handle NTFS reparse points, and MacOS
-        * symlink support. For instance...
-        *
-        * rc = CIFSSMBQueryReparseLinkInfo(...)
-        *
-        * For now, just return -EACCES when the server doesn't support posix
-        * extensions. Note that we still allow querying symlinks when posix
-        * extensions are manually disabled. We could disable these as well
-        * but there doesn't seem to be any harm in allowing the client to
-        * read them.
-        */
-       if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MF_SYMLINKS) &&
-           !cap_unix(tcon->ses)) {
-               rc = -EACCES;
-               goto out;
-       }
+       server = tcon->ses->server;
 
        full_path = build_path_from_dentry(direntry);
        if (!full_path)
@@ -559,6 +542,9 @@ cifs_follow_link(struct dentry *direntry, struct nameidata *nd)
        if ((rc != 0) && cap_unix(tcon->ses))
                rc = CIFSSMBUnixQuerySymLink(xid, tcon, full_path, &target_path,
                                             cifs_sb->local_nls);
+       else if (rc != 0 && server->ops->query_symlink)
+               rc = server->ops->query_symlink(xid, tcon, full_path,
+                                               &target_path, cifs_sb);
 
        kfree(full_path);
 out:
index f7d4b22..138a011 100644 (file)
@@ -105,6 +105,7 @@ sesInfoFree(struct cifs_ses *buf_to_free)
        }
        kfree(buf_to_free->user_name);
        kfree(buf_to_free->domainName);
+       kfree(buf_to_free->auth_key.response);
        kfree(buf_to_free);
 }
 
@@ -545,19 +546,15 @@ void cifs_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
        oplock &= 0xF;
 
        if (oplock == OPLOCK_EXCLUSIVE) {
-               cinode->clientCanCacheAll = true;
-               cinode->clientCanCacheRead = true;
+               cinode->oplock = CIFS_CACHE_WRITE_FLG | CIFS_CACHE_READ_FLG;
                cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
                         &cinode->vfs_inode);
        } else if (oplock == OPLOCK_READ) {
-               cinode->clientCanCacheAll = false;
-               cinode->clientCanCacheRead = true;
+               cinode->oplock = CIFS_CACHE_READ_FLG;
                cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
                         &cinode->vfs_inode);
-       } else {
-               cinode->clientCanCacheAll = false;
-               cinode->clientCanCacheRead = false;
-       }
+       } else
+               cinode->oplock = 0;
 }
 
 bool
index 69d2c82..42ef03b 100644 (file)
@@ -172,6 +172,9 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
                if (cifs_dfs_is_possible(cifs_sb) &&
                    (fattr->cf_cifsattrs & ATTR_REPARSE))
                        fattr->cf_flags |= CIFS_FATTR_NEED_REVAL;
+       } else if (fattr->cf_cifsattrs & ATTR_REPARSE) {
+               fattr->cf_mode = S_IFLNK;
+               fattr->cf_dtype = DT_LNK;
        } else {
                fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
                fattr->cf_dtype = DT_REG;
index 08dd37b..5f99b7f 100644 (file)
@@ -226,7 +226,7 @@ static void unicode_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
                *(bcc_ptr+1) = 0;
        } else {
                bytes_ret = cifs_strtoUTF16((__le16 *) bcc_ptr, ses->user_name,
-                                           MAX_USERNAME_SIZE, nls_cp);
+                                           CIFS_MAX_USERNAME_LEN, nls_cp);
        }
        bcc_ptr += 2 * bytes_ret;
        bcc_ptr += 2; /* account for null termination */
@@ -246,8 +246,8 @@ static void ascii_ssetup_strings(char **pbcc_area, struct cifs_ses *ses,
        /* BB what about null user mounts - check that we do this BB */
        /* copy user */
        if (ses->user_name != NULL) {
-               strncpy(bcc_ptr, ses->user_name, MAX_USERNAME_SIZE);
-               bcc_ptr += strnlen(ses->user_name, MAX_USERNAME_SIZE);
+               strncpy(bcc_ptr, ses->user_name, CIFS_MAX_USERNAME_LEN);
+               bcc_ptr += strnlen(ses->user_name, CIFS_MAX_USERNAME_LEN);
        }
        /* else null user mount */
        *bcc_ptr = 0;
@@ -428,7 +428,8 @@ void build_ntlmssp_negotiate_blob(unsigned char *pbuffer,
                NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
        if (ses->server->sign) {
                flags |= NTLMSSP_NEGOTIATE_SIGN;
-               if (!ses->server->session_estab)
+               if (!ses->server->session_estab ||
+                               ses->ntlmssp->sesskey_per_smbsess)
                        flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
        }
 
@@ -466,7 +467,8 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
                NTLMSSP_NEGOTIATE_NTLM | NTLMSSP_NEGOTIATE_EXTENDED_SEC;
        if (ses->server->sign) {
                flags |= NTLMSSP_NEGOTIATE_SIGN;
-               if (!ses->server->session_estab)
+               if (!ses->server->session_estab ||
+                               ses->ntlmssp->sesskey_per_smbsess)
                        flags |= NTLMSSP_NEGOTIATE_KEY_XCH;
        }
 
@@ -501,7 +503,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
        } else {
                int len;
                len = cifs_strtoUTF16((__le16 *)tmp, ses->domainName,
-                                     MAX_USERNAME_SIZE, nls_cp);
+                                     CIFS_MAX_USERNAME_LEN, nls_cp);
                len *= 2; /* unicode is 2 bytes each */
                sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
                sec_blob->DomainName.Length = cpu_to_le16(len);
@@ -517,7 +519,7 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
        } else {
                int len;
                len = cifs_strtoUTF16((__le16 *)tmp, ses->user_name,
-                                     MAX_USERNAME_SIZE, nls_cp);
+                                     CIFS_MAX_USERNAME_LEN, nls_cp);
                len *= 2; /* unicode is 2 bytes each */
                sec_blob->UserName.BufferOffset = cpu_to_le32(tmp - pbuffer);
                sec_blob->UserName.Length = cpu_to_le16(len);
@@ -629,7 +631,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
        type = select_sectype(ses->server, ses->sectype);
        cifs_dbg(FYI, "sess setup type %d\n", type);
        if (type == Unspecified) {
-               cifs_dbg(VFS, "Unable to select appropriate authentication method!");
+               cifs_dbg(VFS,
+                       "Unable to select appropriate authentication method!");
                return -EINVAL;
        }
 
@@ -640,6 +643,8 @@ CIFS_SessSetup(const unsigned int xid, struct cifs_ses *ses,
                ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
                if (!ses->ntlmssp)
                        return -ENOMEM;
+               ses->ntlmssp->sesskey_per_smbsess = false;
+
        }
 
 ssetup_ntlmssp_authenticate:
@@ -815,8 +820,9 @@ ssetup_ntlmssp_authenticate:
                ses->auth_key.response = kmemdup(msg->data, msg->sesskey_len,
                                                 GFP_KERNEL);
                if (!ses->auth_key.response) {
-                       cifs_dbg(VFS, "Kerberos can't allocate (%u bytes) memory",
-                                       msg->sesskey_len);
+                       cifs_dbg(VFS,
+                               "Kerberos can't allocate (%u bytes) memory",
+                               msg->sesskey_len);
                        rc = -ENOMEM;
                        goto ssetup_exit;
                }
@@ -1005,5 +1011,37 @@ ssetup_exit:
        if ((phase == NtLmChallenge) && (rc == 0))
                goto ssetup_ntlmssp_authenticate;
 
+       if (!rc) {
+               mutex_lock(&ses->server->srv_mutex);
+               if (!ses->server->session_estab) {
+                       if (ses->server->sign) {
+                               ses->server->session_key.response =
+                                       kmemdup(ses->auth_key.response,
+                                       ses->auth_key.len, GFP_KERNEL);
+                               if (!ses->server->session_key.response) {
+                                       rc = -ENOMEM;
+                                       mutex_unlock(&ses->server->srv_mutex);
+                                       goto keycp_exit;
+                               }
+                               ses->server->session_key.len =
+                                                       ses->auth_key.len;
+                       }
+                       ses->server->sequence_number = 0x2;
+                       ses->server->session_estab = true;
+               }
+               mutex_unlock(&ses->server->srv_mutex);
+
+               cifs_dbg(FYI, "CIFS session established successfully\n");
+               spin_lock(&GlobalMid_Lock);
+               ses->status = CifsGood;
+               ses->need_reconnect = false;
+               spin_unlock(&GlobalMid_Lock);
+       }
+
+keycp_exit:
+       kfree(ses->auth_key.response);
+       ses->auth_key.response = NULL;
+       kfree(ses->ntlmssp);
+
        return rc;
 }
index 6094397..8233b17 100644 (file)
@@ -700,7 +700,7 @@ cifs_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
        cfile->fid.netfid = fid->netfid;
        cifs_set_oplock_level(cinode, oplock);
-       cinode->can_cache_brlcks = cinode->clientCanCacheAll;
+       cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
 }
 
 static void
@@ -837,7 +837,7 @@ cifs_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 {
        return CIFSSMBLock(0, tcon, fid->netfid, current->tgid, 0, 0, 0, 0,
                           LOCKING_ANDX_OPLOCK_RELEASE, false,
-                          cinode->clientCanCacheRead ? 1 : 0);
+                          CIFS_CACHE_READ(cinode) ? 1 : 0);
 }
 
 static int
@@ -881,6 +881,43 @@ cifs_mand_lock(const unsigned int xid, struct cifsFileInfo *cfile, __u64 offset,
                           (__u8)type, wait, 0);
 }
 
+static int
+cifs_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+                  const char *full_path, char **target_path,
+                  struct cifs_sb_info *cifs_sb)
+{
+       int rc;
+       int oplock = 0;
+       __u16 netfid;
+
+       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+
+       rc = CIFSSMBOpen(xid, tcon, full_path, FILE_OPEN,
+                        FILE_READ_ATTRIBUTES, OPEN_REPARSE_POINT, &netfid,
+                        &oplock, NULL, cifs_sb->local_nls,
+                        cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MAP_SPECIAL_CHR);
+       if (rc)
+               return rc;
+
+       rc = CIFSSMBQuerySymLink(xid, tcon, netfid, target_path,
+                                cifs_sb->local_nls);
+       if (rc) {
+               CIFSSMBClose(xid, tcon, netfid);
+               return rc;
+       }
+
+       convert_delimiter(*target_path, '/');
+       CIFSSMBClose(xid, tcon, netfid);
+       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       return rc;
+}
+
+static bool
+cifs_is_read_op(__u32 oplock)
+{
+       return oplock == OPLOCK_READ;
+}
+
 struct smb_version_operations smb1_operations = {
        .send_cancel = send_nt_cancel,
        .compare_fids = cifs_compare_fids,
@@ -927,6 +964,7 @@ struct smb_version_operations smb1_operations = {
        .rename_pending_delete = cifs_rename_pending_delete,
        .rename = CIFSSMBRename,
        .create_hardlink = CIFSCreateHardLink,
+       .query_symlink = cifs_query_symlink,
        .open = cifs_open_file,
        .set_fid = cifs_set_fid,
        .close = cifs_close_file,
@@ -945,6 +983,7 @@ struct smb_version_operations smb1_operations = {
        .mand_unlock_range = cifs_unlock_range,
        .push_mand_locks = cifs_push_mandatory_locks,
        .query_mf_symlink = open_query_close_cifs_symlink,
+       .is_read_op = cifs_is_read_op,
 };
 
 struct smb_version_values smb1_values = {
@@ -960,7 +999,6 @@ struct smb_version_values smb1_values = {
        .cap_unix = CAP_UNIX,
        .cap_nt_find = CAP_NT_SMBS | CAP_NT_FIND,
        .cap_large_files = CAP_LARGE_FILES,
-       .oplock_read = OPLOCK_READ,
        .signing_enabled = SECMODE_SIGN_ENABLED,
        .signing_required = SECMODE_SIGN_REQUIRED,
 };
index 04a81a4..3f17b45 100644 (file)
 #include "fscache.h"
 #include "smb2proto.h"
 
-void
-smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock)
-{
-       oplock &= 0xFF;
-       if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
-               return;
-       if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE ||
-           oplock == SMB2_OPLOCK_LEVEL_BATCH) {
-               cinode->clientCanCacheAll = true;
-               cinode->clientCanCacheRead = true;
-               cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
-                        &cinode->vfs_inode);
-       } else if (oplock == SMB2_OPLOCK_LEVEL_II) {
-               cinode->clientCanCacheAll = false;
-               cinode->clientCanCacheRead = true;
-               cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
-                        &cinode->vfs_inode);
-       } else {
-               cinode->clientCanCacheAll = false;
-               cinode->clientCanCacheRead = false;
-       }
-}
-
 int
 smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
               __u32 *oplock, FILE_ALL_INFO *buf)
@@ -86,7 +63,7 @@ smb2_open_file(const unsigned int xid, struct cifs_open_parms *oparms,
        if (oparms->tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING)
                memcpy(smb2_oplock + 1, fid->lease_key, SMB2_LEASE_KEY_SIZE);
 
-       rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data);
+       rc = SMB2_open(xid, oparms, smb2_path, smb2_oplock, smb2_data, NULL);
        if (rc)
                goto out;
 
index c6ec163..78ff88c 100644 (file)
@@ -60,7 +60,7 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
        oparms.fid = &fid;
        oparms.reconnect = false;
 
-       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL);
+       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
        if (rc) {
                kfree(utf16_path);
                return rc;
@@ -136,7 +136,8 @@ smb2_query_path_info(const unsigned int xid, struct cifs_tcon *tcon,
                return -ENOMEM;
 
        rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
-                               FILE_READ_ATTRIBUTES, FILE_OPEN, 0, smb2_data,
+                               FILE_READ_ATTRIBUTES, FILE_OPEN,
+                               OPEN_REPARSE_POINT, smb2_data,
                                SMB2_OP_QUERY_INFO);
        if (rc)
                goto out;
@@ -191,8 +192,8 @@ smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
            struct cifs_sb_info *cifs_sb)
 {
        return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
-                                 CREATE_DELETE_ON_CLOSE, NULL,
-                                 SMB2_OP_DELETE);
+                                 CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
+                                 NULL, SMB2_OP_DELETE);
 }
 
 static int
index b0c4334..fb39662 100644 (file)
@@ -171,6 +171,10 @@ smb2_check_message(char *buf, unsigned int length)
        if (4 + len != clc_len) {
                cifs_dbg(FYI, "Calculated size %u length %u mismatch mid %llu\n",
                         clc_len, 4 + len, mid);
+               /* create failed on symlink */
+               if (command == SMB2_CREATE_HE &&
+                   hdr->Status == STATUS_STOPPED_ON_SYMLINK)
+                       return 0;
                /* Windows 7 server returns 24 bytes more */
                if (clc_len + 20 == len && command == SMB2_OPLOCK_BREAK_HE)
                        return 0;
@@ -376,23 +380,15 @@ cifs_convert_path_to_utf16(const char *from, struct cifs_sb_info *cifs_sb)
 __le32
 smb2_get_lease_state(struct cifsInodeInfo *cinode)
 {
-       if (cinode->clientCanCacheAll)
-               return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING;
-       else if (cinode->clientCanCacheRead)
-               return SMB2_LEASE_READ_CACHING;
-       return 0;
-}
-
-__u8 smb2_map_lease_to_oplock(__le32 lease_state)
-{
-       if (lease_state & SMB2_LEASE_WRITE_CACHING) {
-               if (lease_state & SMB2_LEASE_HANDLE_CACHING)
-                       return SMB2_OPLOCK_LEVEL_BATCH;
-               else
-                       return SMB2_OPLOCK_LEVEL_EXCLUSIVE;
-       } else if (lease_state & SMB2_LEASE_READ_CACHING)
-               return SMB2_OPLOCK_LEVEL_II;
-       return 0;
+       __le32 lease = 0;
+
+       if (CIFS_CACHE_WRITE(cinode))
+               lease |= SMB2_LEASE_WRITE_CACHING;
+       if (CIFS_CACHE_HANDLE(cinode))
+               lease |= SMB2_LEASE_HANDLE_CACHING;
+       if (CIFS_CACHE_READ(cinode))
+               lease |= SMB2_LEASE_READ_CACHING;
+       return lease;
 }
 
 struct smb2_lease_break_work {
@@ -417,96 +413,109 @@ cifs_ses_oplock_break(struct work_struct *work)
 }
 
 static bool
-smb2_is_valid_lease_break(char *buffer, struct TCP_Server_Info *server)
+smb2_tcon_has_lease(struct cifs_tcon *tcon, struct smb2_lease_break *rsp,
+                   struct smb2_lease_break_work *lw)
 {
-       struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
-       struct list_head *tmp, *tmp1, *tmp2;
-       struct cifs_ses *ses;
-       struct cifs_tcon *tcon;
-       struct cifsInodeInfo *cinode;
+       bool found;
+       __u8 lease_state;
+       struct list_head *tmp;
        struct cifsFileInfo *cfile;
+       struct TCP_Server_Info *server = tcon->ses->server;
        struct cifs_pending_open *open;
-       struct smb2_lease_break_work *lw;
-       bool found;
+       struct cifsInodeInfo *cinode;
        int ack_req = le32_to_cpu(rsp->Flags &
                                  SMB2_NOTIFY_BREAK_LEASE_FLAG_ACK_REQUIRED);
 
-       lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
-       if (!lw)
-               return false;
+       lease_state = le32_to_cpu(rsp->NewLeaseState);
 
-       INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
-       lw->lease_state = rsp->NewLeaseState;
+       list_for_each(tmp, &tcon->openFileList) {
+               cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+               cinode = CIFS_I(cfile->dentry->d_inode);
 
-       cifs_dbg(FYI, "Checking for lease break\n");
+               if (memcmp(cinode->lease_key, rsp->LeaseKey,
+                                                       SMB2_LEASE_KEY_SIZE))
+                       continue;
 
-       /* look up tcon based on tid & uid */
-       spin_lock(&cifs_tcp_ses_lock);
-       list_for_each(tmp, &server->smb_ses_list) {
-               ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+               cifs_dbg(FYI, "found in the open list\n");
+               cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+                        le32_to_cpu(rsp->NewLeaseState));
 
-               spin_lock(&cifs_file_list_lock);
-               list_for_each(tmp1, &ses->tcon_list) {
-                       tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+               server->ops->set_oplock_level(cinode, lease_state, 0, NULL);
 
-                       cifs_stats_inc(&tcon->stats.cifs_stats.num_oplock_brks);
-                       list_for_each(tmp2, &tcon->openFileList) {
-                               cfile = list_entry(tmp2, struct cifsFileInfo,
-                                                  tlist);
-                               cinode = CIFS_I(cfile->dentry->d_inode);
+               if (ack_req)
+                       cfile->oplock_break_cancelled = false;
+               else
+                       cfile->oplock_break_cancelled = true;
 
-                               if (memcmp(cinode->lease_key, rsp->LeaseKey,
-                                          SMB2_LEASE_KEY_SIZE))
-                                       continue;
+               queue_work(cifsiod_wq, &cfile->oplock_break);
+               kfree(lw);
+               return true;
+       }
 
-                               cifs_dbg(FYI, "found in the open list\n");
-                               cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
-                                        le32_to_cpu(rsp->NewLeaseState));
+       found = false;
+       list_for_each_entry(open, &tcon->pending_opens, olist) {
+               if (memcmp(open->lease_key, rsp->LeaseKey,
+                          SMB2_LEASE_KEY_SIZE))
+                       continue;
+
+               if (!found && ack_req) {
+                       found = true;
+                       memcpy(lw->lease_key, open->lease_key,
+                              SMB2_LEASE_KEY_SIZE);
+                       lw->tlink = cifs_get_tlink(open->tlink);
+                       queue_work(cifsiod_wq, &lw->lease_break);
+               }
 
-                               smb2_set_oplock_level(cinode,
-                                 smb2_map_lease_to_oplock(rsp->NewLeaseState));
+               cifs_dbg(FYI, "found in the pending open list\n");
+               cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
+                        le32_to_cpu(rsp->NewLeaseState));
 
-                               if (ack_req)
-                                       cfile->oplock_break_cancelled = false;
-                               else
-                                       cfile->oplock_break_cancelled = true;
+               open->oplock = lease_state;
+       }
+       return found;
+}
 
-                               queue_work(cifsiod_wq, &cfile->oplock_break);
+static bool
+smb2_is_valid_lease_break(char *buffer)
+{
+       struct smb2_lease_break *rsp = (struct smb2_lease_break *)buffer;
+       struct list_head *tmp, *tmp1, *tmp2;
+       struct TCP_Server_Info *server;
+       struct cifs_ses *ses;
+       struct cifs_tcon *tcon;
+       struct smb2_lease_break_work *lw;
 
-                               spin_unlock(&cifs_file_list_lock);
-                               spin_unlock(&cifs_tcp_ses_lock);
-                               return true;
-                       }
+       lw = kmalloc(sizeof(struct smb2_lease_break_work), GFP_KERNEL);
+       if (!lw)
+               return false;
 
-                       found = false;
-                       list_for_each_entry(open, &tcon->pending_opens, olist) {
-                               if (memcmp(open->lease_key, rsp->LeaseKey,
-                                          SMB2_LEASE_KEY_SIZE))
-                                       continue;
+       INIT_WORK(&lw->lease_break, cifs_ses_oplock_break);
+       lw->lease_state = rsp->NewLeaseState;
 
-                               if (!found && ack_req) {
-                                       found = true;
-                                       memcpy(lw->lease_key, open->lease_key,
-                                              SMB2_LEASE_KEY_SIZE);
-                                       lw->tlink = cifs_get_tlink(open->tlink);
-                                       queue_work(cifsiod_wq,
-                                                  &lw->lease_break);
-                               }
+       cifs_dbg(FYI, "Checking for lease break\n");
 
-                               cifs_dbg(FYI, "found in the pending open list\n");
-                               cifs_dbg(FYI, "lease key match, lease break 0x%d\n",
-                                        le32_to_cpu(rsp->NewLeaseState));
+       /* look up tcon based on tid & uid */
+       spin_lock(&cifs_tcp_ses_lock);
+       list_for_each(tmp, &cifs_tcp_ses_list) {
+               server = list_entry(tmp, struct TCP_Server_Info, tcp_ses_list);
 
-                               open->oplock =
-                                 smb2_map_lease_to_oplock(rsp->NewLeaseState);
-                       }
-                       if (found) {
-                               spin_unlock(&cifs_file_list_lock);
-                               spin_unlock(&cifs_tcp_ses_lock);
-                               return true;
+               list_for_each(tmp1, &server->smb_ses_list) {
+                       ses = list_entry(tmp1, struct cifs_ses, smb_ses_list);
+
+                       spin_lock(&cifs_file_list_lock);
+                       list_for_each(tmp2, &ses->tcon_list) {
+                               tcon = list_entry(tmp2, struct cifs_tcon,
+                                                 tcon_list);
+                               cifs_stats_inc(
+                                   &tcon->stats.cifs_stats.num_oplock_brks);
+                               if (smb2_tcon_has_lease(tcon, rsp, lw)) {
+                                       spin_unlock(&cifs_file_list_lock);
+                                       spin_unlock(&cifs_tcp_ses_lock);
+                                       return true;
+                               }
                        }
+                       spin_unlock(&cifs_file_list_lock);
                }
-               spin_unlock(&cifs_file_list_lock);
        }
        spin_unlock(&cifs_tcp_ses_lock);
        kfree(lw);
@@ -532,7 +541,7 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
        if (rsp->StructureSize !=
                                smb2_rsp_struct_sizes[SMB2_OPLOCK_BREAK_HE]) {
                if (le16_to_cpu(rsp->StructureSize) == 44)
-                       return smb2_is_valid_lease_break(buffer, server);
+                       return smb2_is_valid_lease_break(buffer);
                else
                        return false;
        }
@@ -560,14 +569,15 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                                cifs_dbg(FYI, "file id match, oplock break\n");
                                cinode = CIFS_I(cfile->dentry->d_inode);
 
-                               if (!cinode->clientCanCacheAll &&
+                               if (!CIFS_CACHE_WRITE(cinode) &&
                                    rsp->OplockLevel == SMB2_OPLOCK_LEVEL_NONE)
                                        cfile->oplock_break_cancelled = true;
                                else
                                        cfile->oplock_break_cancelled = false;
 
-                               smb2_set_oplock_level(cinode,
-                                 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0);
+                               server->ops->set_oplock_level(cinode,
+                                 rsp->OplockLevel ? SMB2_OPLOCK_LEVEL_II : 0,
+                                 0, NULL);
 
                                queue_work(cifsiod_wq, &cfile->oplock_break);
 
index f259e6c..861b332 100644 (file)
@@ -24,6 +24,7 @@
 #include "smb2proto.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
+#include "cifs_unicode.h"
 #include "smb2status.h"
 #include "smb2glob.h"
 
@@ -229,7 +230,7 @@ smb2_is_path_accessible(const unsigned int xid, struct cifs_tcon *tcon,
        oparms.fid = &fid;
        oparms.reconnect = false;
 
-       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL);
+       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
        if (rc) {
                kfree(utf16_path);
                return rc;
@@ -376,10 +377,13 @@ static void
 smb2_set_fid(struct cifsFileInfo *cfile, struct cifs_fid *fid, __u32 oplock)
 {
        struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
+       struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
+
        cfile->fid.persistent_fid = fid->persistent_fid;
        cfile->fid.volatile_fid = fid->volatile_fid;
-       smb2_set_oplock_level(cinode, oplock);
-       cinode->can_cache_brlcks = cinode->clientCanCacheAll;
+       server->ops->set_oplock_level(cinode, oplock, fid->epoch,
+                                     &fid->purge_cache);
+       cinode->can_cache_brlcks = CIFS_CACHE_WRITE(cinode);
 }
 
 static void
@@ -463,7 +467,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
        oparms.fid = fid;
        oparms.reconnect = false;
 
-       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL);
+       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL);
        kfree(utf16_path);
        if (rc) {
                cifs_dbg(VFS, "open dir failed\n");
@@ -530,7 +534,7 @@ smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
 
        return SMB2_oplock_break(0, tcon, fid->persistent_fid,
                                 fid->volatile_fid,
-                                cinode->clientCanCacheRead ? 1 : 0);
+                                CIFS_CACHE_READ(cinode) ? 1 : 0);
 }
 
 static int
@@ -550,7 +554,7 @@ smb2_queryfs(const unsigned int xid, struct cifs_tcon *tcon,
        oparms.fid = &fid;
        oparms.reconnect = false;
 
-       rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL);
+       rc = SMB2_open(xid, &oparms, &srch_path, &oplock, NULL, NULL);
        if (rc)
                return rc;
        buf->f_type = SMB2_MAGIC_NUMBER;
@@ -596,7 +600,245 @@ smb2_new_lease_key(struct cifs_fid *fid)
        get_random_bytes(fid->lease_key, SMB2_LEASE_KEY_SIZE);
 }
 
-struct smb_version_operations smb21_operations = {
+static int
+smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
+                  const char *full_path, char **target_path,
+                  struct cifs_sb_info *cifs_sb)
+{
+       int rc;
+       __le16 *utf16_path;
+       __u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+       struct cifs_open_parms oparms;
+       struct cifs_fid fid;
+       struct smb2_err_rsp *err_buf = NULL;
+       struct smb2_symlink_err_rsp *symlink;
+       unsigned int sub_len, sub_offset;
+
+       cifs_dbg(FYI, "%s: path: %s\n", __func__, full_path);
+
+       utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+       if (!utf16_path)
+               return -ENOMEM;
+
+       oparms.tcon = tcon;
+       oparms.desired_access = FILE_READ_ATTRIBUTES;
+       oparms.disposition = FILE_OPEN;
+       oparms.create_options = 0;
+       oparms.fid = &fid;
+       oparms.reconnect = false;
+
+       rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, &err_buf);
+
+       if (!rc || !err_buf) {
+               kfree(utf16_path);
+               return -ENOENT;
+       }
+       /* open must fail on symlink - reset rc */
+       rc = 0;
+       symlink = (struct smb2_symlink_err_rsp *)err_buf->ErrorData;
+       sub_len = le16_to_cpu(symlink->SubstituteNameLength);
+       sub_offset = le16_to_cpu(symlink->SubstituteNameOffset);
+       *target_path = cifs_strndup_from_utf16(
+                               (char *)symlink->PathBuffer + sub_offset,
+                               sub_len, true, cifs_sb->local_nls);
+       if (!(*target_path)) {
+               kfree(utf16_path);
+               return -ENOMEM;
+       }
+       convert_delimiter(*target_path, '/');
+       cifs_dbg(FYI, "%s: target path: %s\n", __func__, *target_path);
+       kfree(utf16_path);
+       return rc;
+}
+
+static void
+smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+                     unsigned int epoch, bool *purge_cache)
+{
+       oplock &= 0xFF;
+       if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
+               return;
+       if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
+               cinode->oplock = CIFS_CACHE_RHW_FLG;
+               cifs_dbg(FYI, "Batch Oplock granted on inode %p\n",
+                        &cinode->vfs_inode);
+       } else if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
+               cinode->oplock = CIFS_CACHE_RW_FLG;
+               cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
+                        &cinode->vfs_inode);
+       } else if (oplock == SMB2_OPLOCK_LEVEL_II) {
+               cinode->oplock = CIFS_CACHE_READ_FLG;
+               cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
+                        &cinode->vfs_inode);
+       } else
+               cinode->oplock = 0;
+}
+
+static void
+smb21_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+                      unsigned int epoch, bool *purge_cache)
+{
+       char message[5] = {0};
+
+       oplock &= 0xFF;
+       if (oplock == SMB2_OPLOCK_LEVEL_NOCHANGE)
+               return;
+
+       cinode->oplock = 0;
+       if (oplock & SMB2_LEASE_READ_CACHING_HE) {
+               cinode->oplock |= CIFS_CACHE_READ_FLG;
+               strcat(message, "R");
+       }
+       if (oplock & SMB2_LEASE_HANDLE_CACHING_HE) {
+               cinode->oplock |= CIFS_CACHE_HANDLE_FLG;
+               strcat(message, "H");
+       }
+       if (oplock & SMB2_LEASE_WRITE_CACHING_HE) {
+               cinode->oplock |= CIFS_CACHE_WRITE_FLG;
+               strcat(message, "W");
+       }
+       if (!cinode->oplock)
+               strcat(message, "None");
+       cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
+                &cinode->vfs_inode);
+}
+
+static void
+smb3_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock,
+                     unsigned int epoch, bool *purge_cache)
+{
+       unsigned int old_oplock = cinode->oplock;
+
+       smb21_set_oplock_level(cinode, oplock, epoch, purge_cache);
+
+       if (purge_cache) {
+               *purge_cache = false;
+               if (old_oplock == CIFS_CACHE_READ_FLG) {
+                       if (cinode->oplock == CIFS_CACHE_READ_FLG &&
+                           (epoch - cinode->epoch > 0))
+                               *purge_cache = true;
+                       else if (cinode->oplock == CIFS_CACHE_RH_FLG &&
+                                (epoch - cinode->epoch > 1))
+                               *purge_cache = true;
+                       else if (cinode->oplock == CIFS_CACHE_RHW_FLG &&
+                                (epoch - cinode->epoch > 1))
+                               *purge_cache = true;
+                       else if (cinode->oplock == 0 &&
+                                (epoch - cinode->epoch > 0))
+                               *purge_cache = true;
+               } else if (old_oplock == CIFS_CACHE_RH_FLG) {
+                       if (cinode->oplock == CIFS_CACHE_RH_FLG &&
+                           (epoch - cinode->epoch > 0))
+                               *purge_cache = true;
+                       else if (cinode->oplock == CIFS_CACHE_RHW_FLG &&
+                                (epoch - cinode->epoch > 1))
+                               *purge_cache = true;
+               }
+               cinode->epoch = epoch;
+       }
+}
+
+static bool
+smb2_is_read_op(__u32 oplock)
+{
+       return oplock == SMB2_OPLOCK_LEVEL_II;
+}
+
+static bool
+smb21_is_read_op(__u32 oplock)
+{
+       return (oplock & SMB2_LEASE_READ_CACHING_HE) &&
+              !(oplock & SMB2_LEASE_WRITE_CACHING_HE);
+}
+
+static __le32
+map_oplock_to_lease(u8 oplock)
+{
+       if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
+               return SMB2_LEASE_WRITE_CACHING | SMB2_LEASE_READ_CACHING;
+       else if (oplock == SMB2_OPLOCK_LEVEL_II)
+               return SMB2_LEASE_READ_CACHING;
+       else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
+               return SMB2_LEASE_HANDLE_CACHING | SMB2_LEASE_READ_CACHING |
+                      SMB2_LEASE_WRITE_CACHING;
+       return 0;
+}
+
+static char *
+smb2_create_lease_buf(u8 *lease_key, u8 oplock)
+{
+       struct create_lease *buf;
+
+       buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL);
+       if (!buf)
+               return NULL;
+
+       buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
+       buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+       buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
+
+       buf->ccontext.DataOffset = cpu_to_le16(offsetof
+                                       (struct create_lease, lcontext));
+       buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
+       buf->ccontext.NameOffset = cpu_to_le16(offsetof
+                               (struct create_lease, Name));
+       buf->ccontext.NameLength = cpu_to_le16(4);
+       buf->Name[0] = 'R';
+       buf->Name[1] = 'q';
+       buf->Name[2] = 'L';
+       buf->Name[3] = 's';
+       return (char *)buf;
+}
+
+static char *
+smb3_create_lease_buf(u8 *lease_key, u8 oplock)
+{
+       struct create_lease_v2 *buf;
+
+       buf = kzalloc(sizeof(struct create_lease_v2), GFP_KERNEL);
+       if (!buf)
+               return NULL;
+
+       buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
+       buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
+       buf->lcontext.LeaseState = map_oplock_to_lease(oplock);
+
+       buf->ccontext.DataOffset = cpu_to_le16(offsetof
+                                       (struct create_lease_v2, lcontext));
+       buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2));
+       buf->ccontext.NameOffset = cpu_to_le16(offsetof
+                               (struct create_lease_v2, Name));
+       buf->ccontext.NameLength = cpu_to_le16(4);
+       buf->Name[0] = 'R';
+       buf->Name[1] = 'q';
+       buf->Name[2] = 'L';
+       buf->Name[3] = 's';
+       return (char *)buf;
+}
+
+static __u8
+smb2_parse_lease_buf(void *buf, unsigned int *epoch)
+{
+       struct create_lease *lc = (struct create_lease *)buf;
+
+       *epoch = 0; /* not used */
+       if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
+               return SMB2_OPLOCK_LEVEL_NOCHANGE;
+       return le32_to_cpu(lc->lcontext.LeaseState);
+}
+
+static __u8
+smb3_parse_lease_buf(void *buf, unsigned int *epoch)
+{
+       struct create_lease_v2 *lc = (struct create_lease_v2 *)buf;
+
+       *epoch = le16_to_cpu(lc->lcontext.Epoch);
+       if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
+               return SMB2_OPLOCK_LEVEL_NOCHANGE;
+       return le32_to_cpu(lc->lcontext.LeaseState);
+}
+
+struct smb_version_operations smb20_operations = {
        .compare_fids = smb2_compare_fids,
        .setup_request = smb2_setup_request,
        .setup_async_request = smb2_setup_async_request,
@@ -638,6 +880,7 @@ struct smb_version_operations smb21_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
+       .query_symlink = smb2_query_symlink,
        .open = smb2_open_file,
        .set_fid = smb2_set_fid,
        .close = smb2_close_file,
@@ -660,8 +903,82 @@ struct smb_version_operations smb21_operations = {
        .set_lease_key = smb2_set_lease_key,
        .new_lease_key = smb2_new_lease_key,
        .calc_signature = smb2_calc_signature,
+       .is_read_op = smb2_is_read_op,
+       .set_oplock_level = smb2_set_oplock_level,
+       .create_lease_buf = smb2_create_lease_buf,
+       .parse_lease_buf = smb2_parse_lease_buf,
 };
 
+struct smb_version_operations smb21_operations = {
+       .compare_fids = smb2_compare_fids,
+       .setup_request = smb2_setup_request,
+       .setup_async_request = smb2_setup_async_request,
+       .check_receive = smb2_check_receive,
+       .add_credits = smb2_add_credits,
+       .set_credits = smb2_set_credits,
+       .get_credits_field = smb2_get_credits_field,
+       .get_credits = smb2_get_credits,
+       .get_next_mid = smb2_get_next_mid,
+       .read_data_offset = smb2_read_data_offset,
+       .read_data_length = smb2_read_data_length,
+       .map_error = map_smb2_to_linux_error,
+       .find_mid = smb2_find_mid,
+       .check_message = smb2_check_message,
+       .dump_detail = smb2_dump_detail,
+       .clear_stats = smb2_clear_stats,
+       .print_stats = smb2_print_stats,
+       .is_oplock_break = smb2_is_valid_oplock_break,
+       .need_neg = smb2_need_neg,
+       .negotiate = smb2_negotiate,
+       .negotiate_wsize = smb2_negotiate_wsize,
+       .negotiate_rsize = smb2_negotiate_rsize,
+       .sess_setup = SMB2_sess_setup,
+       .logoff = SMB2_logoff,
+       .tree_connect = SMB2_tcon,
+       .tree_disconnect = SMB2_tdis,
+       .is_path_accessible = smb2_is_path_accessible,
+       .can_echo = smb2_can_echo,
+       .echo = SMB2_echo,
+       .query_path_info = smb2_query_path_info,
+       .get_srv_inum = smb2_get_srv_inum,
+       .query_file_info = smb2_query_file_info,
+       .set_path_size = smb2_set_path_size,
+       .set_file_size = smb2_set_file_size,
+       .set_file_info = smb2_set_file_info,
+       .mkdir = smb2_mkdir,
+       .mkdir_setinfo = smb2_mkdir_setinfo,
+       .rmdir = smb2_rmdir,
+       .unlink = smb2_unlink,
+       .rename = smb2_rename_path,
+       .create_hardlink = smb2_create_hardlink,
+       .query_symlink = smb2_query_symlink,
+       .open = smb2_open_file,
+       .set_fid = smb2_set_fid,
+       .close = smb2_close_file,
+       .flush = smb2_flush_file,
+       .async_readv = smb2_async_readv,
+       .async_writev = smb2_async_writev,
+       .sync_read = smb2_sync_read,
+       .sync_write = smb2_sync_write,
+       .query_dir_first = smb2_query_dir_first,
+       .query_dir_next = smb2_query_dir_next,
+       .close_dir = smb2_close_dir,
+       .calc_smb_size = smb2_calc_size,
+       .is_status_pending = smb2_is_status_pending,
+       .oplock_response = smb2_oplock_response,
+       .queryfs = smb2_queryfs,
+       .mand_lock = smb2_mand_lock,
+       .mand_unlock_range = smb2_unlock_range,
+       .push_mand_locks = smb2_push_mandatory_locks,
+       .get_lease_key = smb2_get_lease_key,
+       .set_lease_key = smb2_set_lease_key,
+       .new_lease_key = smb2_new_lease_key,
+       .calc_signature = smb2_calc_signature,
+       .is_read_op = smb21_is_read_op,
+       .set_oplock_level = smb21_set_oplock_level,
+       .create_lease_buf = smb2_create_lease_buf,
+       .parse_lease_buf = smb2_parse_lease_buf,
+};
 
 struct smb_version_operations smb30_operations = {
        .compare_fids = smb2_compare_fids,
@@ -706,6 +1023,7 @@ struct smb_version_operations smb30_operations = {
        .unlink = smb2_unlink,
        .rename = smb2_rename_path,
        .create_hardlink = smb2_create_hardlink,
+       .query_symlink = smb2_query_symlink,
        .open = smb2_open_file,
        .set_fid = smb2_set_fid,
        .close = smb2_close_file,
@@ -729,6 +1047,10 @@ struct smb_version_operations smb30_operations = {
        .new_lease_key = smb2_new_lease_key,
        .generate_signingkey = generate_smb3signingkey,
        .calc_signature = smb3_calc_signature,
+       .is_read_op = smb21_is_read_op,
+       .set_oplock_level = smb3_set_oplock_level,
+       .create_lease_buf = smb3_create_lease_buf,
+       .parse_lease_buf = smb3_parse_lease_buf,
 };
 
 struct smb_version_values smb20_values = {
@@ -746,9 +1068,9 @@ struct smb_version_values smb20_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
-       .oplock_read = SMB2_OPLOCK_LEVEL_II,
        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease),
 };
 
 struct smb_version_values smb21_values = {
@@ -766,9 +1088,9 @@ struct smb_version_values smb21_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
-       .oplock_read = SMB2_OPLOCK_LEVEL_II,
        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease),
 };
 
 struct smb_version_values smb30_values = {
@@ -786,9 +1108,9 @@ struct smb_version_values smb30_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
-       .oplock_read = SMB2_OPLOCK_LEVEL_II,
        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease_v2),
 };
 
 struct smb_version_values smb302_values = {
@@ -806,7 +1128,7 @@ struct smb_version_values smb302_values = {
        .cap_unix = 0,
        .cap_nt_find = SMB2_NT_FIND,
        .cap_large_files = SMB2_LARGE_FILES,
-       .oplock_read = SMB2_OPLOCK_LEVEL_II,
        .signing_enabled = SMB2_NEGOTIATE_SIGNING_ENABLED | SMB2_NEGOTIATE_SIGNING_REQUIRED,
        .signing_required = SMB2_NEGOTIATE_SIGNING_REQUIRED,
+       .create_lease_size = sizeof(struct create_lease_v2),
 };
index abc9c28..eba0efd 100644 (file)
@@ -477,6 +477,13 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
                return -EIO;
        }
 
+       /*
+        * If we are here due to reconnect, free per-smb session key
+        * in case signing was required.
+        */
+       kfree(ses->auth_key.response);
+       ses->auth_key.response = NULL;
+
        /*
         * If memory allocation is successful, caller of this function
         * frees it.
@@ -484,6 +491,7 @@ SMB2_sess_setup(const unsigned int xid, struct cifs_ses *ses,
        ses->ntlmssp = kmalloc(sizeof(struct ntlmssp_auth), GFP_KERNEL);
        if (!ses->ntlmssp)
                return -ENOMEM;
+       ses->ntlmssp->sesskey_per_smbsess = true;
 
        /* FIXME: allow for other auth types besides NTLMSSP (e.g. krb5) */
        ses->sectype = RawNTLMSSP;
@@ -628,6 +636,40 @@ ssetup_exit:
        /* if ntlmssp, and negotiate succeeded, proceed to authenticate phase */
        if ((phase == NtLmChallenge) && (rc == 0))
                goto ssetup_ntlmssp_authenticate;
+
+       if (!rc) {
+               mutex_lock(&server->srv_mutex);
+               if (server->sign && server->ops->generate_signingkey) {
+                       rc = server->ops->generate_signingkey(ses);
+                       kfree(ses->auth_key.response);
+                       ses->auth_key.response = NULL;
+                       if (rc) {
+                               cifs_dbg(FYI,
+                                       "SMB3 session key generation failed\n");
+                               mutex_unlock(&server->srv_mutex);
+                               goto keygen_exit;
+                       }
+               }
+               if (!server->session_estab) {
+                       server->sequence_number = 0x2;
+                       server->session_estab = true;
+               }
+               mutex_unlock(&server->srv_mutex);
+
+               cifs_dbg(FYI, "SMB2/3 session established successfully\n");
+               spin_lock(&GlobalMid_Lock);
+               ses->status = CifsGood;
+               ses->need_reconnect = false;
+               spin_unlock(&GlobalMid_Lock);
+       }
+
+keygen_exit:
+       if (!server->sign) {
+               kfree(ses->auth_key.response);
+               ses->auth_key.response = NULL;
+       }
+       kfree(ses->ntlmssp);
+
        return rc;
 }
 
@@ -813,39 +855,6 @@ SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon)
        return rc;
 }
 
-static struct create_lease *
-create_lease_buf(u8 *lease_key, u8 oplock)
-{
-       struct create_lease *buf;
-
-       buf = kzalloc(sizeof(struct create_lease), GFP_KERNEL);
-       if (!buf)
-               return NULL;
-
-       buf->lcontext.LeaseKeyLow = cpu_to_le64(*((u64 *)lease_key));
-       buf->lcontext.LeaseKeyHigh = cpu_to_le64(*((u64 *)(lease_key + 8)));
-       if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE)
-               buf->lcontext.LeaseState = SMB2_LEASE_WRITE_CACHING |
-                                          SMB2_LEASE_READ_CACHING;
-       else if (oplock == SMB2_OPLOCK_LEVEL_II)
-               buf->lcontext.LeaseState = SMB2_LEASE_READ_CACHING;
-       else if (oplock == SMB2_OPLOCK_LEVEL_BATCH)
-               buf->lcontext.LeaseState = SMB2_LEASE_HANDLE_CACHING |
-                                          SMB2_LEASE_READ_CACHING |
-                                          SMB2_LEASE_WRITE_CACHING;
-
-       buf->ccontext.DataOffset = cpu_to_le16(offsetof
-                                       (struct create_lease, lcontext));
-       buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context));
-       buf->ccontext.NameOffset = cpu_to_le16(offsetof
-                               (struct create_lease, Name));
-       buf->ccontext.NameLength = cpu_to_le16(4);
-       buf->Name[0] = 'R';
-       buf->Name[1] = 'q';
-       buf->Name[2] = 'L';
-       buf->Name[3] = 's';
-       return buf;
-}
 
 static struct create_durable *
 create_durable_buf(void)
@@ -894,55 +903,49 @@ create_reconnect_durable_buf(struct cifs_fid *fid)
 }
 
 static __u8
-parse_lease_state(struct smb2_create_rsp *rsp)
+parse_lease_state(struct TCP_Server_Info *server, struct smb2_create_rsp *rsp,
+                 unsigned int *epoch)
 {
        char *data_offset;
-       struct create_lease *lc;
-       bool found = false;
+       struct create_context *cc;
        unsigned int next = 0;
        char *name;
 
        data_offset = (char *)rsp + 4 + le32_to_cpu(rsp->CreateContextsOffset);
-       lc = (struct create_lease *)data_offset;
+       cc = (struct create_context *)data_offset;
        do {
-               lc = (struct create_lease *)((char *)lc + next);
-               name = le16_to_cpu(lc->ccontext.NameOffset) + (char *)lc;
-               if (le16_to_cpu(lc->ccontext.NameLength) != 4 ||
+               cc = (struct create_context *)((char *)cc + next);
+               name = le16_to_cpu(cc->NameOffset) + (char *)cc;
+               if (le16_to_cpu(cc->NameLength) != 4 ||
                    strncmp(name, "RqLs", 4)) {
-                       next = le32_to_cpu(lc->ccontext.Next);
+                       next = le32_to_cpu(cc->Next);
                        continue;
                }
-               if (lc->lcontext.LeaseFlags & SMB2_LEASE_FLAG_BREAK_IN_PROGRESS)
-                       return SMB2_OPLOCK_LEVEL_NOCHANGE;
-               found = true;
-               break;
+               return server->ops->parse_lease_buf(cc, epoch);
        } while (next != 0);
 
-       if (!found)
-               return 0;
-
-       return smb2_map_lease_to_oplock(lc->lcontext.LeaseState);
+       return 0;
 }
 
 static int
-add_lease_context(struct kvec *iov, unsigned int *num_iovec, __u8 *oplock)
+add_lease_context(struct TCP_Server_Info *server, struct kvec *iov,
+                 unsigned int *num_iovec, __u8 *oplock)
 {
        struct smb2_create_req *req = iov[0].iov_base;
        unsigned int num = *num_iovec;
 
-       iov[num].iov_base = create_lease_buf(oplock+1, *oplock);
+       iov[num].iov_base = server->ops->create_lease_buf(oplock+1, *oplock);
        if (iov[num].iov_base == NULL)
                return -ENOMEM;
-       iov[num].iov_len = sizeof(struct create_lease);
+       iov[num].iov_len = server->vals->create_lease_size;
        req->RequestedOplockLevel = SMB2_OPLOCK_LEVEL_LEASE;
        if (!req->CreateContextsOffset)
                req->CreateContextsOffset = cpu_to_le32(
                                sizeof(struct smb2_create_req) - 4 +
                                iov[num - 1].iov_len);
-       req->CreateContextsLength = cpu_to_le32(
-                               le32_to_cpu(req->CreateContextsLength) +
-                               sizeof(struct create_lease));
-       inc_rfc1001_len(&req->hdr, sizeof(struct create_lease));
+       le32_add_cpu(&req->CreateContextsLength,
+                    server->vals->create_lease_size);
+       inc_rfc1001_len(&req->hdr, server->vals->create_lease_size);
        *num_iovec = num + 1;
        return 0;
 }
@@ -967,9 +970,7 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
                req->CreateContextsOffset =
                        cpu_to_le32(sizeof(struct smb2_create_req) - 4 +
                                                                iov[1].iov_len);
-       req->CreateContextsLength =
-                       cpu_to_le32(le32_to_cpu(req->CreateContextsLength) +
-                                               sizeof(struct create_durable));
+       le32_add_cpu(&req->CreateContextsLength, sizeof(struct create_durable));
        inc_rfc1001_len(&req->hdr, sizeof(struct create_durable));
        *num_iovec = num + 1;
        return 0;
@@ -977,7 +978,8 @@ add_durable_context(struct kvec *iov, unsigned int *num_iovec,
 
 int
 SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
-         __u8 *oplock, struct smb2_file_all_info *buf)
+         __u8 *oplock, struct smb2_file_all_info *buf,
+         struct smb2_err_rsp **err_buf)
 {
        struct smb2_create_req *req;
        struct smb2_create_rsp *rsp;
@@ -1048,11 +1050,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        if (!server->oplocks)
                *oplock = SMB2_OPLOCK_LEVEL_NONE;
 
-       if (!(tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
+       if (!(server->capabilities & SMB2_GLOBAL_CAP_LEASING) ||
            *oplock == SMB2_OPLOCK_LEVEL_NONE)
                req->RequestedOplockLevel = *oplock;
        else {
-               rc = add_lease_context(iov, &num_iovecs, oplock);
+               rc = add_lease_context(server, iov, &num_iovecs, oplock);
                if (rc) {
                        cifs_small_buf_release(req);
                        kfree(copy_path);
@@ -1062,11 +1064,11 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 
        if (*oplock == SMB2_OPLOCK_LEVEL_BATCH) {
                /* need to set Next field of lease context if we request it */
-               if (tcon->ses->server->capabilities & SMB2_GLOBAL_CAP_LEASING) {
+               if (server->capabilities & SMB2_GLOBAL_CAP_LEASING) {
                        struct create_context *ccontext =
                            (struct create_context *)iov[num_iovecs-1].iov_base;
                        ccontext->Next =
-                               cpu_to_le32(sizeof(struct create_lease));
+                               cpu_to_le32(server->vals->create_lease_size);
                }
                rc = add_durable_context(iov, &num_iovecs, oparms);
                if (rc) {
@@ -1082,6 +1084,9 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
 
        if (rc != 0) {
                cifs_stats_fail_inc(tcon, SMB2_CREATE_HE);
+               if (err_buf)
+                       *err_buf = kmemdup(rsp, get_rfc1002_length(rsp) + 4,
+                                          GFP_KERNEL);
                goto creat_exit;
        }
 
@@ -1098,7 +1103,7 @@ SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms, __le16 *path,
        }
 
        if (rsp->OplockLevel == SMB2_OPLOCK_LEVEL_LEASE)
-               *oplock = parse_lease_state(rsp);
+               *oplock = parse_lease_state(server, rsp, &oparms->fid->epoch);
        else
                *oplock = rsp->OplockLevel;
 creat_exit:
index 36b0d37..b83d011 100644 (file)
@@ -150,6 +150,20 @@ struct smb2_err_rsp {
        __u8   ErrorData[1];  /* variable length */
 } __packed;
 
+struct smb2_symlink_err_rsp {
+       __le32 SymLinkLength;
+       __le32 SymLinkErrorTag;
+       __le32 ReparseTag;
+       __le16 ReparseDataLength;
+       __le16 UnparsedPathLength;
+       __le16 SubstituteNameOffset;
+       __le16 SubstituteNameLength;
+       __le16 PrintNameOffset;
+       __le16 PrintNameLength;
+       __le32 Flags;
+       __u8  PathBuffer[0];
+} __packed;
+
 #define SMB2_CLIENT_GUID_SIZE 16
 
 extern __u8 cifs_client_guid[SMB2_CLIENT_GUID_SIZE];
@@ -462,6 +476,10 @@ struct create_context {
        __u8 Buffer[0];
 } __packed;
 
+#define SMB2_LEASE_READ_CACHING_HE     0x01
+#define SMB2_LEASE_HANDLE_CACHING_HE   0x02
+#define SMB2_LEASE_WRITE_CACHING_HE    0x04
+
 #define SMB2_LEASE_NONE                        __constant_cpu_to_le32(0x00)
 #define SMB2_LEASE_READ_CACHING                __constant_cpu_to_le32(0x01)
 #define SMB2_LEASE_HANDLE_CACHING      __constant_cpu_to_le32(0x02)
@@ -479,12 +497,31 @@ struct lease_context {
        __le64 LeaseDuration;
 } __packed;
 
+struct lease_context_v2 {
+       __le64 LeaseKeyLow;
+       __le64 LeaseKeyHigh;
+       __le32 LeaseState;
+       __le32 LeaseFlags;
+       __le64 LeaseDuration;
+       __le64 ParentLeaseKeyLow;
+       __le64 ParentLeaseKeyHigh;
+       __le16 Epoch;
+       __le16 Reserved;
+} __packed;
+
 struct create_lease {
        struct create_context ccontext;
        __u8   Name[8];
        struct lease_context lcontext;
 } __packed;
 
+struct create_lease_v2 {
+       struct create_context ccontext;
+       __u8   Name[8];
+       struct lease_context_v2 lcontext;
+       __u8   Pad[4];
+} __packed;
+
 struct create_durable {
        struct create_context ccontext;
        __u8   Name[8];
index 1a5ecbe..e3fb480 100644 (file)
@@ -53,7 +53,6 @@ extern int smb3_calc_signature(struct smb_rqst *rqst,
                                struct TCP_Server_Info *server);
 extern void smb2_echo_request(struct work_struct *work);
 extern __le32 smb2_get_lease_state(struct cifsInodeInfo *cinode);
-extern __u8 smb2_map_lease_to_oplock(__le32 lease_state);
 extern bool smb2_is_valid_oplock_break(char *buffer,
                                       struct TCP_Server_Info *srv);
 
@@ -87,7 +86,6 @@ extern int smb2_create_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
 extern int smb2_open_file(const unsigned int xid,
                          struct cifs_open_parms *oparms,
                          __u32 *oplock, FILE_ALL_INFO *buf);
-extern void smb2_set_oplock_level(struct cifsInodeInfo *cinode, __u32 oplock);
 extern int smb2_unlock_range(struct cifsFileInfo *cfile,
                             struct file_lock *flock, const unsigned int xid);
 extern int smb2_push_mandatory_locks(struct cifsFileInfo *cfile);
@@ -106,7 +104,8 @@ extern int SMB2_tcon(const unsigned int xid, struct cifs_ses *ses,
 extern int SMB2_tdis(const unsigned int xid, struct cifs_tcon *tcon);
 extern int SMB2_open(const unsigned int xid, struct cifs_open_parms *oparms,
                     __le16 *path, __u8 *oplock,
-                    struct smb2_file_all_info *buf);
+                    struct smb2_file_all_info *buf,
+                    struct smb2_err_rsp **err_buf);
 extern int SMB2_ioctl(const unsigned int xid, struct cifs_tcon *tcon,
                     u64 persistent_fid, u64 volatile_fid, u32 opcode,
                     bool is_fsctl, char *in_data, u32 indatalen,
index 4f2300d..340abca 100644 (file)
@@ -114,6 +114,23 @@ smb3_crypto_shash_allocate(struct TCP_Server_Info *server)
        return 0;
 }
 
+static struct cifs_ses *
+smb2_find_smb_ses(struct smb2_hdr *smb2hdr, struct TCP_Server_Info *server)
+{
+       struct cifs_ses *ses;
+
+       spin_lock(&cifs_tcp_ses_lock);
+       list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
+               if (ses->Suid != smb2hdr->SessionId)
+                       continue;
+               spin_unlock(&cifs_tcp_ses_lock);
+               return ses;
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
+
+       return NULL;
+}
+
 
 int
 smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
@@ -124,6 +141,13 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        struct kvec *iov = rqst->rq_iov;
        int n_vec = rqst->rq_nvec;
        struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+       struct cifs_ses *ses;
+
+       ses = smb2_find_smb_ses(smb2_pdu, server);
+       if (!ses) {
+               cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+               return 0;
+       }
 
        memset(smb2_signature, 0x0, SMB2_HMACSHA256_SIZE);
        memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
@@ -135,7 +159,7 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        }
 
        rc = crypto_shash_setkey(server->secmech.hmacsha256,
-               server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+               ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with response\n", __func__);
                return rc;
@@ -198,8 +222,8 @@ smb2_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        return rc;
 }
 
-void
-generate_smb3signingkey(struct TCP_Server_Info *server)
+int
+generate_smb3signingkey(struct cifs_ses *ses)
 {
        unsigned char zero = 0x0;
        __u8 i[4] = {0, 0, 0, 1};
@@ -209,90 +233,99 @@ generate_smb3signingkey(struct TCP_Server_Info *server)
        unsigned char *hashptr = prfhash;
 
        memset(prfhash, 0x0, SMB2_HMACSHA256_SIZE);
-       memset(server->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
+       memset(ses->smb3signingkey, 0x0, SMB3_SIGNKEY_SIZE);
 
-       rc = smb3_crypto_shash_allocate(server);
+       rc = smb3_crypto_shash_allocate(ses->server);
        if (rc) {
                cifs_dbg(VFS, "%s: crypto alloc failed\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_setkey(server->secmech.hmacsha256,
-               server->session_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
+       rc = crypto_shash_setkey(ses->server->secmech.hmacsha256,
+               ses->auth_key.response, SMB2_NTLMV2_SESSKEY_SIZE);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not set with session key\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_init(&server->secmech.sdeschmacsha256->shash);
+       rc = crypto_shash_init(&ses->server->secmech.sdeschmacsha256->shash);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not init sign hmac\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
                                i, 4);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with n\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
                                "SMB2AESCMAC", 12);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with label\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
                                &zero, 1);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with zero\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
                                "SmbSign", 8);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with context\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_update(&ses->server->secmech.sdeschmacsha256->shash,
                                L, 4);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not update with L\n", __func__);
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_final(&server->secmech.sdeschmacsha256->shash,
+       rc = crypto_shash_final(&ses->server->secmech.sdeschmacsha256->shash,
                                hashptr);
        if (rc) {
                cifs_dbg(VFS, "%s: Could not generate sha256 hash\n", __func__);
                goto smb3signkey_ret;
        }
 
-       memcpy(server->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
+       memcpy(ses->smb3signingkey, hashptr, SMB3_SIGNKEY_SIZE);
 
 smb3signkey_ret:
-       return;
+       return rc;
 }
 
 int
 smb3_calc_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
 {
-       int i, rc;
+       int i;
+       int rc = 0;
        unsigned char smb3_signature[SMB2_CMACAES_SIZE];
        unsigned char *sigptr = smb3_signature;
        struct kvec *iov = rqst->rq_iov;
        int n_vec = rqst->rq_nvec;
        struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)iov[0].iov_base;
+       struct cifs_ses *ses;
+
+       ses = smb2_find_smb_ses(smb2_pdu, server);
+       if (!ses) {
+               cifs_dbg(VFS, "%s: Could not find session\n", __func__);
+               return 0;
+       }
 
        memset(smb3_signature, 0x0, SMB2_CMACAES_SIZE);
        memset(smb2_pdu->Signature, 0x0, SMB2_SIGNATURE_SIZE);
 
        rc = crypto_shash_setkey(server->secmech.cmacaes,
-               server->smb3signingkey, SMB2_CMACAES_SIZE);
+               ses->smb3signingkey, SMB2_CMACAES_SIZE);
+
        if (rc) {
                cifs_dbg(VFS, "%s: Could not set key for cmac aes\n", __func__);
                return rc;
@@ -389,6 +422,7 @@ smb2_verify_signature(struct smb_rqst *rqst, struct TCP_Server_Info *server)
        struct smb2_hdr *smb2_pdu = (struct smb2_hdr *)rqst->rq_iov[0].iov_base;
 
        if ((smb2_pdu->Command == SMB2_NEGOTIATE) ||
+           (smb2_pdu->Command == SMB2_SESSION_SETUP) ||
            (smb2_pdu->Command == SMB2_OPLOCK_BREAK) ||
            (!server->session_estab))
                return 0;
diff --git a/fs/cifs/winucase.c b/fs/cifs/winucase.c
new file mode 100644 (file)
index 0000000..1506d4f
--- /dev/null
@@ -0,0 +1,663 @@
+/*
+ * fs/cifs/winucase.c
+ *
+ * Copyright (c) Jeffrey Layton <jlayton@redhat.com>, 2013
+ *
+ * This program is free software;  you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY;  without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ * the GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program;  if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * The const tables in this file were converted from the following info
+ * provided by Microsoft:
+ *
+ * 3.1.5.3 Mapping UTF-16 Strings to Upper Case:
+ *
+ * http://msdn.microsoft.com/en-us/library/hh877830.aspx
+ * http://www.microsoft.com/en-us/download/details.aspx?displaylang=en&id=10921
+ *
+ * In particular, the table in "Windows 8 Upper Case Mapping Table.txt" was
+ * post-processed using the winucase_convert.pl script.
+ */
+
+#include <linux/nls.h>
+
+wchar_t cifs_toupper(wchar_t in);  /* quiet sparse */
+
+static const wchar_t t2_00[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+       0x0048, 0x0049, 0x004a, 0x004b, 0x004c, 0x004d, 0x004e, 0x004f,
+       0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+       0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7,
+       0x00c8, 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf,
+       0x00d0, 0x00d1, 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000,
+       0x00d8, 0x00d9, 0x00da, 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178,
+};
+
+static const wchar_t t2_01[256] = {
+       0x0000, 0x0100, 0x0000, 0x0102, 0x0000, 0x0104, 0x0000, 0x0106,
+       0x0000, 0x0108, 0x0000, 0x010a, 0x0000, 0x010c, 0x0000, 0x010e,
+       0x0000, 0x0110, 0x0000, 0x0112, 0x0000, 0x0114, 0x0000, 0x0116,
+       0x0000, 0x0118, 0x0000, 0x011a, 0x0000, 0x011c, 0x0000, 0x011e,
+       0x0000, 0x0120, 0x0000, 0x0122, 0x0000, 0x0124, 0x0000, 0x0126,
+       0x0000, 0x0128, 0x0000, 0x012a, 0x0000, 0x012c, 0x0000, 0x012e,
+       0x0000, 0x0000, 0x0000, 0x0132, 0x0000, 0x0134, 0x0000, 0x0136,
+       0x0000, 0x0000, 0x0139, 0x0000, 0x013b, 0x0000, 0x013d, 0x0000,
+       0x013f, 0x0000, 0x0141, 0x0000, 0x0143, 0x0000, 0x0145, 0x0000,
+       0x0147, 0x0000, 0x0000, 0x014a, 0x0000, 0x014c, 0x0000, 0x014e,
+       0x0000, 0x0150, 0x0000, 0x0152, 0x0000, 0x0154, 0x0000, 0x0156,
+       0x0000, 0x0158, 0x0000, 0x015a, 0x0000, 0x015c, 0x0000, 0x015e,
+       0x0000, 0x0160, 0x0000, 0x0162, 0x0000, 0x0164, 0x0000, 0x0166,
+       0x0000, 0x0168, 0x0000, 0x016a, 0x0000, 0x016c, 0x0000, 0x016e,
+       0x0000, 0x0170, 0x0000, 0x0172, 0x0000, 0x0174, 0x0000, 0x0176,
+       0x0000, 0x0000, 0x0179, 0x0000, 0x017b, 0x0000, 0x017d, 0x0000,
+       0x0243, 0x0000, 0x0000, 0x0182, 0x0000, 0x0184, 0x0000, 0x0000,
+       0x0187, 0x0000, 0x0000, 0x0000, 0x018b, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0191, 0x0000, 0x0000, 0x01f6, 0x0000, 0x0000,
+       0x0000, 0x0198, 0x023d, 0x0000, 0x0000, 0x0000, 0x0220, 0x0000,
+       0x0000, 0x01a0, 0x0000, 0x01a2, 0x0000, 0x01a4, 0x0000, 0x0000,
+       0x01a7, 0x0000, 0x0000, 0x0000, 0x0000, 0x01ac, 0x0000, 0x0000,
+       0x01af, 0x0000, 0x0000, 0x0000, 0x01b3, 0x0000, 0x01b5, 0x0000,
+       0x0000, 0x01b8, 0x0000, 0x0000, 0x0000, 0x01bc, 0x0000, 0x01f7,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x01c4, 0x0000,
+       0x0000, 0x01c7, 0x0000, 0x0000, 0x01ca, 0x0000, 0x01cd, 0x0000,
+       0x01cf, 0x0000, 0x01d1, 0x0000, 0x01d3, 0x0000, 0x01d5, 0x0000,
+       0x01d7, 0x0000, 0x01d9, 0x0000, 0x01db, 0x018e, 0x0000, 0x01de,
+       0x0000, 0x01e0, 0x0000, 0x01e2, 0x0000, 0x01e4, 0x0000, 0x01e6,
+       0x0000, 0x01e8, 0x0000, 0x01ea, 0x0000, 0x01ec, 0x0000, 0x01ee,
+       0x0000, 0x0000, 0x0000, 0x01f1, 0x0000, 0x01f4, 0x0000, 0x0000,
+       0x0000, 0x01f8, 0x0000, 0x01fa, 0x0000, 0x01fc, 0x0000, 0x01fe,
+};
+
+static const wchar_t t2_02[256] = {
+       0x0000, 0x0200, 0x0000, 0x0202, 0x0000, 0x0204, 0x0000, 0x0206,
+       0x0000, 0x0208, 0x0000, 0x020a, 0x0000, 0x020c, 0x0000, 0x020e,
+       0x0000, 0x0210, 0x0000, 0x0212, 0x0000, 0x0214, 0x0000, 0x0216,
+       0x0000, 0x0218, 0x0000, 0x021a, 0x0000, 0x021c, 0x0000, 0x021e,
+       0x0000, 0x0000, 0x0000, 0x0222, 0x0000, 0x0224, 0x0000, 0x0226,
+       0x0000, 0x0228, 0x0000, 0x022a, 0x0000, 0x022c, 0x0000, 0x022e,
+       0x0000, 0x0230, 0x0000, 0x0232, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x023b, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0241, 0x0000, 0x0000, 0x0000, 0x0000, 0x0246,
+       0x0000, 0x0248, 0x0000, 0x024a, 0x0000, 0x024c, 0x0000, 0x024e,
+       0x2c6f, 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a,
+       0x0000, 0x018f, 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0193, 0x0000, 0x0000, 0x0194, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0197, 0x0196, 0x0000, 0x2c62, 0x0000, 0x0000, 0x0000, 0x019c,
+       0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, 0x019f, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, 0x0000, 0x0000,
+       0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_03[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0370, 0x0000, 0x0372, 0x0000, 0x0000, 0x0000, 0x0376,
+       0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0386, 0x0388, 0x0389, 0x038a,
+       0x0000, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
+       0x0398, 0x0399, 0x039a, 0x039b, 0x039c, 0x039d, 0x039e, 0x039f,
+       0x03a0, 0x03a1, 0x0000, 0x03a3, 0x03a4, 0x03a5, 0x03a6, 0x03a7,
+       0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, 0x038e, 0x038f, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03cf,
+       0x0000, 0x03d8, 0x0000, 0x03da, 0x0000, 0x03dc, 0x0000, 0x03de,
+       0x0000, 0x03e0, 0x0000, 0x03e2, 0x0000, 0x03e4, 0x0000, 0x03e6,
+       0x0000, 0x03e8, 0x0000, 0x03ea, 0x0000, 0x03ec, 0x0000, 0x03ee,
+       0x0000, 0x0000, 0x03f9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x03f7, 0x0000, 0x0000, 0x03fa, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_04[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
+       0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, 0x041f,
+       0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
+       0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f,
+       0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
+       0x0408, 0x0409, 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f,
+       0x0000, 0x0460, 0x0000, 0x0462, 0x0000, 0x0464, 0x0000, 0x0466,
+       0x0000, 0x0468, 0x0000, 0x046a, 0x0000, 0x046c, 0x0000, 0x046e,
+       0x0000, 0x0470, 0x0000, 0x0472, 0x0000, 0x0474, 0x0000, 0x0476,
+       0x0000, 0x0478, 0x0000, 0x047a, 0x0000, 0x047c, 0x0000, 0x047e,
+       0x0000, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x048a, 0x0000, 0x048c, 0x0000, 0x048e,
+       0x0000, 0x0490, 0x0000, 0x0492, 0x0000, 0x0494, 0x0000, 0x0496,
+       0x0000, 0x0498, 0x0000, 0x049a, 0x0000, 0x049c, 0x0000, 0x049e,
+       0x0000, 0x04a0, 0x0000, 0x04a2, 0x0000, 0x04a4, 0x0000, 0x04a6,
+       0x0000, 0x04a8, 0x0000, 0x04aa, 0x0000, 0x04ac, 0x0000, 0x04ae,
+       0x0000, 0x04b0, 0x0000, 0x04b2, 0x0000, 0x04b4, 0x0000, 0x04b6,
+       0x0000, 0x04b8, 0x0000, 0x04ba, 0x0000, 0x04bc, 0x0000, 0x04be,
+       0x0000, 0x0000, 0x04c1, 0x0000, 0x04c3, 0x0000, 0x04c5, 0x0000,
+       0x04c7, 0x0000, 0x04c9, 0x0000, 0x04cb, 0x0000, 0x04cd, 0x04c0,
+       0x0000, 0x04d0, 0x0000, 0x04d2, 0x0000, 0x04d4, 0x0000, 0x04d6,
+       0x0000, 0x04d8, 0x0000, 0x04da, 0x0000, 0x04dc, 0x0000, 0x04de,
+       0x0000, 0x04e0, 0x0000, 0x04e2, 0x0000, 0x04e4, 0x0000, 0x04e6,
+       0x0000, 0x04e8, 0x0000, 0x04ea, 0x0000, 0x04ec, 0x0000, 0x04ee,
+       0x0000, 0x04f0, 0x0000, 0x04f2, 0x0000, 0x04f4, 0x0000, 0x04f6,
+       0x0000, 0x04f8, 0x0000, 0x04fa, 0x0000, 0x04fc, 0x0000, 0x04fe,
+};
+
+static const wchar_t t2_05[256] = {
+       0x0000, 0x0500, 0x0000, 0x0502, 0x0000, 0x0504, 0x0000, 0x0506,
+       0x0000, 0x0508, 0x0000, 0x050a, 0x0000, 0x050c, 0x0000, 0x050e,
+       0x0000, 0x0510, 0x0000, 0x0512, 0x0000, 0x0514, 0x0000, 0x0516,
+       0x0000, 0x0518, 0x0000, 0x051a, 0x0000, 0x051c, 0x0000, 0x051e,
+       0x0000, 0x0520, 0x0000, 0x0522, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537,
+       0x0538, 0x0539, 0x053a, 0x053b, 0x053c, 0x053d, 0x053e, 0x053f,
+       0x0540, 0x0541, 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547,
+       0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, 0x054e, 0x054f,
+       0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_1d[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_1e[256] = {
+       0x0000, 0x1e00, 0x0000, 0x1e02, 0x0000, 0x1e04, 0x0000, 0x1e06,
+       0x0000, 0x1e08, 0x0000, 0x1e0a, 0x0000, 0x1e0c, 0x0000, 0x1e0e,
+       0x0000, 0x1e10, 0x0000, 0x1e12, 0x0000, 0x1e14, 0x0000, 0x1e16,
+       0x0000, 0x1e18, 0x0000, 0x1e1a, 0x0000, 0x1e1c, 0x0000, 0x1e1e,
+       0x0000, 0x1e20, 0x0000, 0x1e22, 0x0000, 0x1e24, 0x0000, 0x1e26,
+       0x0000, 0x1e28, 0x0000, 0x1e2a, 0x0000, 0x1e2c, 0x0000, 0x1e2e,
+       0x0000, 0x1e30, 0x0000, 0x1e32, 0x0000, 0x1e34, 0x0000, 0x1e36,
+       0x0000, 0x1e38, 0x0000, 0x1e3a, 0x0000, 0x1e3c, 0x0000, 0x1e3e,
+       0x0000, 0x1e40, 0x0000, 0x1e42, 0x0000, 0x1e44, 0x0000, 0x1e46,
+       0x0000, 0x1e48, 0x0000, 0x1e4a, 0x0000, 0x1e4c, 0x0000, 0x1e4e,
+       0x0000, 0x1e50, 0x0000, 0x1e52, 0x0000, 0x1e54, 0x0000, 0x1e56,
+       0x0000, 0x1e58, 0x0000, 0x1e5a, 0x0000, 0x1e5c, 0x0000, 0x1e5e,
+       0x0000, 0x1e60, 0x0000, 0x1e62, 0x0000, 0x1e64, 0x0000, 0x1e66,
+       0x0000, 0x1e68, 0x0000, 0x1e6a, 0x0000, 0x1e6c, 0x0000, 0x1e6e,
+       0x0000, 0x1e70, 0x0000, 0x1e72, 0x0000, 0x1e74, 0x0000, 0x1e76,
+       0x0000, 0x1e78, 0x0000, 0x1e7a, 0x0000, 0x1e7c, 0x0000, 0x1e7e,
+       0x0000, 0x1e80, 0x0000, 0x1e82, 0x0000, 0x1e84, 0x0000, 0x1e86,
+       0x0000, 0x1e88, 0x0000, 0x1e8a, 0x0000, 0x1e8c, 0x0000, 0x1e8e,
+       0x0000, 0x1e90, 0x0000, 0x1e92, 0x0000, 0x1e94, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x1ea0, 0x0000, 0x1ea2, 0x0000, 0x1ea4, 0x0000, 0x1ea6,
+       0x0000, 0x1ea8, 0x0000, 0x1eaa, 0x0000, 0x1eac, 0x0000, 0x1eae,
+       0x0000, 0x1eb0, 0x0000, 0x1eb2, 0x0000, 0x1eb4, 0x0000, 0x1eb6,
+       0x0000, 0x1eb8, 0x0000, 0x1eba, 0x0000, 0x1ebc, 0x0000, 0x1ebe,
+       0x0000, 0x1ec0, 0x0000, 0x1ec2, 0x0000, 0x1ec4, 0x0000, 0x1ec6,
+       0x0000, 0x1ec8, 0x0000, 0x1eca, 0x0000, 0x1ecc, 0x0000, 0x1ece,
+       0x0000, 0x1ed0, 0x0000, 0x1ed2, 0x0000, 0x1ed4, 0x0000, 0x1ed6,
+       0x0000, 0x1ed8, 0x0000, 0x1eda, 0x0000, 0x1edc, 0x0000, 0x1ede,
+       0x0000, 0x1ee0, 0x0000, 0x1ee2, 0x0000, 0x1ee4, 0x0000, 0x1ee6,
+       0x0000, 0x1ee8, 0x0000, 0x1eea, 0x0000, 0x1eec, 0x0000, 0x1eee,
+       0x0000, 0x1ef0, 0x0000, 0x1ef2, 0x0000, 0x1ef4, 0x0000, 0x1ef6,
+       0x0000, 0x1ef8, 0x0000, 0x1efa, 0x0000, 0x1efc, 0x0000, 0x1efe,
+};
+
+static const wchar_t t2_1f[256] = {
+       0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f18, 0x1f19, 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f28, 0x1f29, 0x1f2a, 0x1f2b, 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, 0x1f3e, 0x1f3f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x1f59, 0x0000, 0x1f5b, 0x0000, 0x1f5d, 0x0000, 0x1f5f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1fba, 0x1fbb, 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb,
+       0x1ff8, 0x1ff9, 0x1fea, 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000,
+       0x1f88, 0x1f89, 0x1f8a, 0x1f8b, 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1f98, 0x1f99, 0x1f9a, 0x1f9b, 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1fa8, 0x1fa9, 0x1faa, 0x1fab, 0x1fac, 0x1fad, 0x1fae, 0x1faf,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1fb8, 0x1fb9, 0x0000, 0x1fbc, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x1fcc, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1fd8, 0x1fd9, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x1fe8, 0x1fe9, 0x0000, 0x0000, 0x0000, 0x1fec, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x1ffc, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_21[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x2160, 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167,
+       0x2168, 0x2169, 0x216a, 0x216b, 0x216c, 0x216d, 0x216e, 0x216f,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x2183, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_24[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x24b6, 0x24b7, 0x24b8, 0x24b9, 0x24ba, 0x24bb, 0x24bc, 0x24bd,
+       0x24be, 0x24bf, 0x24c0, 0x24c1, 0x24c2, 0x24c3, 0x24c4, 0x24c5,
+       0x24c6, 0x24c7, 0x24c8, 0x24c9, 0x24ca, 0x24cb, 0x24cc, 0x24cd,
+       0x24ce, 0x24cf, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_2c[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, 0x2c06, 0x2c07,
+       0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, 0x2c0f,
+       0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17,
+       0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f,
+       0x2c20, 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27,
+       0x2c28, 0x2c29, 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000,
+       0x0000, 0x2c60, 0x0000, 0x0000, 0x0000, 0x023a, 0x023e, 0x0000,
+       0x2c67, 0x0000, 0x2c69, 0x0000, 0x2c6b, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x2c72, 0x0000, 0x0000, 0x2c75, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x2c80, 0x0000, 0x2c82, 0x0000, 0x2c84, 0x0000, 0x2c86,
+       0x0000, 0x2c88, 0x0000, 0x2c8a, 0x0000, 0x2c8c, 0x0000, 0x2c8e,
+       0x0000, 0x2c90, 0x0000, 0x2c92, 0x0000, 0x2c94, 0x0000, 0x2c96,
+       0x0000, 0x2c98, 0x0000, 0x2c9a, 0x0000, 0x2c9c, 0x0000, 0x2c9e,
+       0x0000, 0x2ca0, 0x0000, 0x2ca2, 0x0000, 0x2ca4, 0x0000, 0x2ca6,
+       0x0000, 0x2ca8, 0x0000, 0x2caa, 0x0000, 0x2cac, 0x0000, 0x2cae,
+       0x0000, 0x2cb0, 0x0000, 0x2cb2, 0x0000, 0x2cb4, 0x0000, 0x2cb6,
+       0x0000, 0x2cb8, 0x0000, 0x2cba, 0x0000, 0x2cbc, 0x0000, 0x2cbe,
+       0x0000, 0x2cc0, 0x0000, 0x2cc2, 0x0000, 0x2cc4, 0x0000, 0x2cc6,
+       0x0000, 0x2cc8, 0x0000, 0x2cca, 0x0000, 0x2ccc, 0x0000, 0x2cce,
+       0x0000, 0x2cd0, 0x0000, 0x2cd2, 0x0000, 0x2cd4, 0x0000, 0x2cd6,
+       0x0000, 0x2cd8, 0x0000, 0x2cda, 0x0000, 0x2cdc, 0x0000, 0x2cde,
+       0x0000, 0x2ce0, 0x0000, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_2d[256] = {
+       0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7,
+       0x10a8, 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af,
+       0x10b0, 0x10b1, 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7,
+       0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf,
+       0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_a6[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0xa640, 0x0000, 0xa642, 0x0000, 0xa644, 0x0000, 0xa646,
+       0x0000, 0xa648, 0x0000, 0xa64a, 0x0000, 0xa64c, 0x0000, 0xa64e,
+       0x0000, 0xa650, 0x0000, 0xa652, 0x0000, 0xa654, 0x0000, 0xa656,
+       0x0000, 0xa658, 0x0000, 0xa65a, 0x0000, 0xa65c, 0x0000, 0xa65e,
+       0x0000, 0x0000, 0x0000, 0xa662, 0x0000, 0xa664, 0x0000, 0xa666,
+       0x0000, 0xa668, 0x0000, 0xa66a, 0x0000, 0xa66c, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0xa680, 0x0000, 0xa682, 0x0000, 0xa684, 0x0000, 0xa686,
+       0x0000, 0xa688, 0x0000, 0xa68a, 0x0000, 0xa68c, 0x0000, 0xa68e,
+       0x0000, 0xa690, 0x0000, 0xa692, 0x0000, 0xa694, 0x0000, 0xa696,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_a7[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0xa722, 0x0000, 0xa724, 0x0000, 0xa726,
+       0x0000, 0xa728, 0x0000, 0xa72a, 0x0000, 0xa72c, 0x0000, 0xa72e,
+       0x0000, 0x0000, 0x0000, 0xa732, 0x0000, 0xa734, 0x0000, 0xa736,
+       0x0000, 0xa738, 0x0000, 0xa73a, 0x0000, 0xa73c, 0x0000, 0xa73e,
+       0x0000, 0xa740, 0x0000, 0xa742, 0x0000, 0xa744, 0x0000, 0xa746,
+       0x0000, 0xa748, 0x0000, 0xa74a, 0x0000, 0xa74c, 0x0000, 0xa74e,
+       0x0000, 0xa750, 0x0000, 0xa752, 0x0000, 0xa754, 0x0000, 0xa756,
+       0x0000, 0xa758, 0x0000, 0xa75a, 0x0000, 0xa75c, 0x0000, 0xa75e,
+       0x0000, 0xa760, 0x0000, 0xa762, 0x0000, 0xa764, 0x0000, 0xa766,
+       0x0000, 0xa768, 0x0000, 0xa76a, 0x0000, 0xa76c, 0x0000, 0xa76e,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0xa779, 0x0000, 0xa77b, 0x0000, 0x0000, 0xa77e,
+       0x0000, 0xa780, 0x0000, 0xa782, 0x0000, 0xa784, 0x0000, 0xa786,
+       0x0000, 0x0000, 0x0000, 0x0000, 0xa78b, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t t2_ff[256] = {
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27,
+       0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f,
+       0xff30, 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37,
+       0xff38, 0xff39, 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+       0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+};
+
+static const wchar_t *const toplevel[256] = {
+       t2_00, t2_01, t2_02, t2_03, t2_04, t2_05,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL, t2_1d, t2_1e, t2_1f,
+       NULL, t2_21,  NULL,  NULL, t2_24,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL, t2_2c, t2_2d,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL, t2_a6, t2_a7,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL,
+       NULL,  NULL,  NULL,  NULL,  NULL,  NULL,  NULL, t2_ff,
+};
+
+/**
+ * cifs_toupper - convert a wchar_t from lower to uppercase
+ * @in: character to convert from lower to uppercase
+ *
+ * This function consults the static tables above to convert a wchar_t from
+ * lower to uppercase. In the event that there is no mapping, the original
+ * "in" character is returned.
+ */
+wchar_t
+cifs_toupper(wchar_t in)
+{
+       unsigned char idx;
+       const wchar_t *tbl;
+       wchar_t out;
+
+       /* grab upper byte */
+       idx = (in & 0xff00) >> 8;
+
+       /* find pointer to 2nd layer table */
+       tbl = toplevel[idx];
+       if (!tbl)
+               return in;
+
+       /* grab lower byte */
+       idx = in & 0xff;
+
+       /* look up character in table */
+       out = tbl[idx];
+       if (out)
+               return out;
+
+       return in;
+}
index 72f816d..9bdeca1 100644 (file)
@@ -190,6 +190,11 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
                                err = cn_printf(cn, "%d",
                                              task_tgid_vnr(current));
                                break;
+                       /* global pid */
+                       case 'P':
+                               err = cn_printf(cn, "%d",
+                                             task_tgid_nr(current));
+                               break;
                        /* uid */
                        case 'u':
                                err = cn_printf(cn, "%d", cred->uid);
index 4d9df3c..4100030 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/rculist_bl.h>
 #include <linux/prefetch.h>
 #include <linux/ratelimit.h>
+#include <linux/list_lru.h>
 #include "internal.h"
 #include "mount.h"
 
@@ -48,7 +49,7 @@
  *   - the dcache hash table
  * s_anon bl list spinlock protects:
  *   - the s_anon list (see __d_drop)
- * dcache_lru_lock protects:
+ * dentry->d_sb->s_dentry_lru_lock protects:
  *   - the dcache lru lists and counters
  * d_lock protects:
  *   - d_flags
@@ -63,7 +64,7 @@
  * Ordering:
  * dentry->d_inode->i_lock
  *   dentry->d_lock
- *     dcache_lru_lock
+ *     dentry->d_sb->s_dentry_lru_lock
  *     dcache_hash_bucket lock
  *     s_anon lock
  *
@@ -81,7 +82,6 @@
 int sysctl_vfs_cache_pressure __read_mostly = 100;
 EXPORT_SYMBOL_GPL(sysctl_vfs_cache_pressure);
 
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dcache_lru_lock);
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(rename_lock);
 
 EXPORT_SYMBOL(rename_lock);
@@ -90,8 +90,8 @@ static struct kmem_cache *dentry_cache __read_mostly;
 
 /**
  * read_seqbegin_or_lock - begin a sequence number check or locking block
- * lock: sequence lock
- * seq : sequence number to be checked
+ * @lock: sequence lock
+ * @seq : sequence number to be checked
  *
  * First try it once optimistically without taking the lock. If that fails,
  * take the lock. The sequence number is also used as a marker for deciding
@@ -103,7 +103,7 @@ static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
        if (!(*seq & 1))        /* Even */
                *seq = read_seqbegin(lock);
        else                    /* Odd */
-               write_seqlock(lock);
+               read_seqlock_excl(lock);
 }
 
 static inline int need_seqretry(seqlock_t *lock, int seq)
@@ -114,7 +114,7 @@ static inline int need_seqretry(seqlock_t *lock, int seq)
 static inline void done_seqretry(seqlock_t *lock, int seq)
 {
        if (seq & 1)
-               write_sequnlock(lock);
+               read_sequnlock_excl(lock);
 }
 
 /*
@@ -146,23 +146,47 @@ struct dentry_stat_t dentry_stat = {
        .age_limit = 45,
 };
 
-static DEFINE_PER_CPU(unsigned int, nr_dentry);
+static DEFINE_PER_CPU(long, nr_dentry);
+static DEFINE_PER_CPU(long, nr_dentry_unused);
 
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
-static int get_nr_dentry(void)
+
+/*
+ * Here we resort to our own counters instead of using generic per-cpu counters
+ * for consistency with what the vfs inode code does. We are expected to harvest
+ * better code and performance by having our own specialized counters.
+ *
+ * Please note that the loop is done over all possible CPUs, not over all online
+ * CPUs. The reason for this is that we don't want to play games with CPUs going
+ * on and off. If one of them goes off, we will just keep their counters.
+ *
+ * glommer: See cffbc8a for details, and if you ever intend to change this,
+ * please update all vfs counters to match.
+ */
+static long get_nr_dentry(void)
 {
        int i;
-       int sum = 0;
+       long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_dentry, i);
        return sum < 0 ? 0 : sum;
 }
 
+static long get_nr_dentry_unused(void)
+{
+       int i;
+       long sum = 0;
+       for_each_possible_cpu(i)
+               sum += per_cpu(nr_dentry_unused, i);
+       return sum < 0 ? 0 : sum;
+}
+
 int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
                   size_t *lenp, loff_t *ppos)
 {
        dentry_stat.nr_dentry = get_nr_dentry();
-       return proc_dointvec(table, write, buffer, lenp, ppos);
+       dentry_stat.nr_unused = get_nr_dentry_unused();
+       return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
 
@@ -333,52 +357,96 @@ static void dentry_unlink_inode(struct dentry * dentry)
 }
 
 /*
- * dentry_lru_(add|del|prune|move_tail) must be called with d_lock held.
+ * The DCACHE_LRU_LIST bit is set whenever the 'd_lru' entry
+ * is in use - which includes both the "real" per-superblock
+ * LRU list _and_ the DCACHE_SHRINK_LIST use.
+ *
+ * The DCACHE_SHRINK_LIST bit is set whenever the dentry is
+ * on the shrink list (ie not on the superblock LRU list).
+ *
+ * The per-cpu "nr_dentry_unused" counters are updated with
+ * the DCACHE_LRU_LIST bit.
+ *
+ * These helper functions make sure we always follow the
+ * rules. d_lock must be held by the caller.
  */
-static void dentry_lru_add(struct dentry *dentry)
+#define D_FLAG_VERIFY(dentry,x) WARN_ON_ONCE(((dentry)->d_flags & (DCACHE_LRU_LIST | DCACHE_SHRINK_LIST)) != (x))
+static void d_lru_add(struct dentry *dentry)
 {
-       if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST))) {
-               spin_lock(&dcache_lru_lock);
-               dentry->d_flags |= DCACHE_LRU_LIST;
-               list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-               dentry->d_sb->s_nr_dentry_unused++;
-               dentry_stat.nr_unused++;
-               spin_unlock(&dcache_lru_lock);
-       }
+       D_FLAG_VERIFY(dentry, 0);
+       dentry->d_flags |= DCACHE_LRU_LIST;
+       this_cpu_inc(nr_dentry_unused);
+       WARN_ON_ONCE(!list_lru_add(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
 }
 
-static void __dentry_lru_del(struct dentry *dentry)
+static void d_lru_del(struct dentry *dentry)
 {
+       D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+       dentry->d_flags &= ~DCACHE_LRU_LIST;
+       this_cpu_dec(nr_dentry_unused);
+       WARN_ON_ONCE(!list_lru_del(&dentry->d_sb->s_dentry_lru, &dentry->d_lru));
+}
+
+static void d_shrink_del(struct dentry *dentry)
+{
+       D_FLAG_VERIFY(dentry, DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
        list_del_init(&dentry->d_lru);
        dentry->d_flags &= ~(DCACHE_SHRINK_LIST | DCACHE_LRU_LIST);
-       dentry->d_sb->s_nr_dentry_unused--;
-       dentry_stat.nr_unused--;
+       this_cpu_dec(nr_dentry_unused);
+}
+
+static void d_shrink_add(struct dentry *dentry, struct list_head *list)
+{
+       D_FLAG_VERIFY(dentry, 0);
+       list_add(&dentry->d_lru, list);
+       dentry->d_flags |= DCACHE_SHRINK_LIST | DCACHE_LRU_LIST;
+       this_cpu_inc(nr_dentry_unused);
 }
 
 /*
- * Remove a dentry with references from the LRU.
+ * These can only be called under the global LRU lock, ie during the
+ * callback for freeing the LRU list. "isolate" removes it from the
+ * LRU lists entirely, while shrink_move moves it to the indicated
+ * private list.
  */
-static void dentry_lru_del(struct dentry *dentry)
+static void d_lru_isolate(struct dentry *dentry)
 {
-       if (!list_empty(&dentry->d_lru)) {
-               spin_lock(&dcache_lru_lock);
-               __dentry_lru_del(dentry);
-               spin_unlock(&dcache_lru_lock);
-       }
+       D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+       dentry->d_flags &= ~DCACHE_LRU_LIST;
+       this_cpu_dec(nr_dentry_unused);
+       list_del_init(&dentry->d_lru);
 }
 
-static void dentry_lru_move_list(struct dentry *dentry, struct list_head *list)
+static void d_lru_shrink_move(struct dentry *dentry, struct list_head *list)
 {
-       spin_lock(&dcache_lru_lock);
-       if (list_empty(&dentry->d_lru)) {
-               dentry->d_flags |= DCACHE_LRU_LIST;
-               list_add_tail(&dentry->d_lru, list);
-               dentry->d_sb->s_nr_dentry_unused++;
-               dentry_stat.nr_unused++;
-       } else {
-               list_move_tail(&dentry->d_lru, list);
+       D_FLAG_VERIFY(dentry, DCACHE_LRU_LIST);
+       dentry->d_flags |= DCACHE_SHRINK_LIST;
+       list_move_tail(&dentry->d_lru, list);
+}
+
+/*
+ * dentry_lru_(add|del)_list) must be called with d_lock held.
+ */
+static void dentry_lru_add(struct dentry *dentry)
+{
+       if (unlikely(!(dentry->d_flags & DCACHE_LRU_LIST)))
+               d_lru_add(dentry);
+}
+
+/*
+ * Remove a dentry with references from the LRU.
+ *
+ * If we are on the shrink list, then we can get to try_prune_one_dentry() and
+ * lose our last reference through the parent walk. In this case, we need to
+ * remove ourselves from the shrink list, not the LRU.
+ */
+static void dentry_lru_del(struct dentry *dentry)
+{
+       if (dentry->d_flags & DCACHE_LRU_LIST) {
+               if (dentry->d_flags & DCACHE_SHRINK_LIST)
+                       return d_shrink_del(dentry);
+               d_lru_del(dentry);
        }
-       spin_unlock(&dcache_lru_lock);
 }
 
 /**
@@ -474,7 +542,8 @@ EXPORT_SYMBOL(d_drop);
  * If ref is non-zero, then decrement the refcount too.
  * Returns dentry requiring refcount drop, or NULL if we're done.
  */
-static inline struct dentry *dentry_kill(struct dentry *dentry)
+static inline struct dentry *
+dentry_kill(struct dentry *dentry, int unlock_on_failure)
        __releases(dentry->d_lock)
 {
        struct inode *inode;
@@ -483,8 +552,10 @@ static inline struct dentry *dentry_kill(struct dentry *dentry)
        inode = dentry->d_inode;
        if (inode && !spin_trylock(&inode->i_lock)) {
 relock:
-               spin_unlock(&dentry->d_lock);
-               cpu_relax();
+               if (unlock_on_failure) {
+                       spin_unlock(&dentry->d_lock);
+                       cpu_relax();
+               }
                return dentry; /* try again with same dentry */
        }
        if (IS_ROOT(dentry))
@@ -567,7 +638,7 @@ repeat:
        return;
 
 kill_it:
-       dentry = dentry_kill(dentry);
+       dentry = dentry_kill(dentry, 1);
        if (dentry)
                goto repeat;
 }
@@ -787,12 +858,12 @@ EXPORT_SYMBOL(d_prune_aliases);
  *
  * This may fail if locks cannot be acquired no problem, just try again.
  */
-static void try_prune_one_dentry(struct dentry *dentry)
+static struct dentry * try_prune_one_dentry(struct dentry *dentry)
        __releases(dentry->d_lock)
 {
        struct dentry *parent;
 
-       parent = dentry_kill(dentry);
+       parent = dentry_kill(dentry, 0);
        /*
         * If dentry_kill returns NULL, we have nothing more to do.
         * if it returns the same dentry, trylocks failed. In either
@@ -804,17 +875,18 @@ static void try_prune_one_dentry(struct dentry *dentry)
         * fragmentation.
         */
        if (!parent)
-               return;
+               return NULL;
        if (parent == dentry)
-               return;
+               return dentry;
 
        /* Prune ancestors. */
        dentry = parent;
        while (dentry) {
                if (lockref_put_or_lock(&dentry->d_lockref))
-                       return;
-               dentry = dentry_kill(dentry);
+                       return NULL;
+               dentry = dentry_kill(dentry, 1);
        }
+       return NULL;
 }
 
 static void shrink_dentry_list(struct list_head *list)
@@ -826,83 +898,159 @@ static void shrink_dentry_list(struct list_head *list)
                dentry = list_entry_rcu(list->prev, struct dentry, d_lru);
                if (&dentry->d_lru == list)
                        break; /* empty */
+
+               /*
+                * Get the dentry lock, and re-verify that the dentry is
+                * this on the shrinking list. If it is, we know that
+                * DCACHE_SHRINK_LIST and DCACHE_LRU_LIST are set.
+                */
                spin_lock(&dentry->d_lock);
                if (dentry != list_entry(list->prev, struct dentry, d_lru)) {
                        spin_unlock(&dentry->d_lock);
                        continue;
                }
 
+               /*
+                * The dispose list is isolated and dentries are not accounted
+                * to the LRU here, so we can simply remove it from the list
+                * here regardless of whether it is referenced or not.
+                */
+               d_shrink_del(dentry);
+
                /*
                 * We found an inuse dentry which was not removed from
-                * the LRU because of laziness during lookup.  Do not free
-                * it - just keep it off the LRU list.
+                * the LRU because of laziness during lookup. Do not free it.
                 */
                if (dentry->d_lockref.count) {
-                       dentry_lru_del(dentry);
                        spin_unlock(&dentry->d_lock);
                        continue;
                }
-
                rcu_read_unlock();
 
-               try_prune_one_dentry(dentry);
+               /*
+                * If 'try_to_prune()' returns a dentry, it will
+                * be the same one we passed in, and d_lock will
+                * have been held the whole time, so it will not
+                * have been added to any other lists. We failed
+                * to get the inode lock.
+                *
+                * We just add it back to the shrink list.
+                */
+               dentry = try_prune_one_dentry(dentry);
 
                rcu_read_lock();
+               if (dentry) {
+                       d_shrink_add(dentry, list);
+                       spin_unlock(&dentry->d_lock);
+               }
        }
        rcu_read_unlock();
 }
 
+static enum lru_status
+dentry_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
+{
+       struct list_head *freeable = arg;
+       struct dentry   *dentry = container_of(item, struct dentry, d_lru);
+
+
+       /*
+        * we are inverting the lru lock/dentry->d_lock here,
+        * so use a trylock. If we fail to get the lock, just skip
+        * it
+        */
+       if (!spin_trylock(&dentry->d_lock))
+               return LRU_SKIP;
+
+       /*
+        * Referenced dentries are still in use. If they have active
+        * counts, just remove them from the LRU. Otherwise give them
+        * another pass through the LRU.
+        */
+       if (dentry->d_lockref.count) {
+               d_lru_isolate(dentry);
+               spin_unlock(&dentry->d_lock);
+               return LRU_REMOVED;
+       }
+
+       if (dentry->d_flags & DCACHE_REFERENCED) {
+               dentry->d_flags &= ~DCACHE_REFERENCED;
+               spin_unlock(&dentry->d_lock);
+
+               /*
+                * The list move itself will be made by the common LRU code. At
+                * this point, we've dropped the dentry->d_lock but keep the
+                * lru lock. This is safe to do, since every list movement is
+                * protected by the lru lock even if both locks are held.
+                *
+                * This is guaranteed by the fact that all LRU management
+                * functions are intermediated by the LRU API calls like
+                * list_lru_add and list_lru_del. List movement in this file
+                * only ever occur through this functions or through callbacks
+                * like this one, that are called from the LRU API.
+                *
+                * The only exceptions to this are functions like
+                * shrink_dentry_list, and code that first checks for the
+                * DCACHE_SHRINK_LIST flag.  Those are guaranteed to be
+                * operating only with stack provided lists after they are
+                * properly isolated from the main list.  It is thus, always a
+                * local access.
+                */
+               return LRU_ROTATE;
+       }
+
+       d_lru_shrink_move(dentry, freeable);
+       spin_unlock(&dentry->d_lock);
+
+       return LRU_REMOVED;
+}
+
 /**
  * prune_dcache_sb - shrink the dcache
  * @sb: superblock
- * @count: number of entries to try to free
+ * @nr_to_scan : number of entries to try to free
+ * @nid: which node to scan for freeable entities
  *
- * Attempt to shrink the superblock dcache LRU by @count entries. This is
+ * Attempt to shrink the superblock dcache LRU by @nr_to_scan entries. This is
  * done when we need more memory an called from the superblock shrinker
  * function.
  *
  * This function may fail to free any resources if all the dentries are in
  * use.
  */
-void prune_dcache_sb(struct super_block *sb, int count)
+long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                    int nid)
 {
-       struct dentry *dentry;
-       LIST_HEAD(referenced);
-       LIST_HEAD(tmp);
+       LIST_HEAD(dispose);
+       long freed;
 
-relock:
-       spin_lock(&dcache_lru_lock);
-       while (!list_empty(&sb->s_dentry_lru)) {
-               dentry = list_entry(sb->s_dentry_lru.prev,
-                               struct dentry, d_lru);
-               BUG_ON(dentry->d_sb != sb);
-
-               if (!spin_trylock(&dentry->d_lock)) {
-                       spin_unlock(&dcache_lru_lock);
-                       cpu_relax();
-                       goto relock;
-               }
+       freed = list_lru_walk_node(&sb->s_dentry_lru, nid, dentry_lru_isolate,
+                                      &dispose, &nr_to_scan);
+       shrink_dentry_list(&dispose);
+       return freed;
+}
 
-               if (dentry->d_flags & DCACHE_REFERENCED) {
-                       dentry->d_flags &= ~DCACHE_REFERENCED;
-                       list_move(&dentry->d_lru, &referenced);
-                       spin_unlock(&dentry->d_lock);
-               } else {
-                       list_move_tail(&dentry->d_lru, &tmp);
-                       dentry->d_flags |= DCACHE_SHRINK_LIST;
-                       spin_unlock(&dentry->d_lock);
-                       if (!--count)
-                               break;
-               }
-               cond_resched_lock(&dcache_lru_lock);
-       }
-       if (!list_empty(&referenced))
-               list_splice(&referenced, &sb->s_dentry_lru);
-       spin_unlock(&dcache_lru_lock);
+static enum lru_status dentry_lru_isolate_shrink(struct list_head *item,
+                                               spinlock_t *lru_lock, void *arg)
+{
+       struct list_head *freeable = arg;
+       struct dentry   *dentry = container_of(item, struct dentry, d_lru);
+
+       /*
+        * we are inverting the lru lock/dentry->d_lock here,
+        * so use a trylock. If we fail to get the lock, just skip
+        * it
+        */
+       if (!spin_trylock(&dentry->d_lock))
+               return LRU_SKIP;
+
+       d_lru_shrink_move(dentry, freeable);
+       spin_unlock(&dentry->d_lock);
 
-       shrink_dentry_list(&tmp);
+       return LRU_REMOVED;
 }
 
+
 /**
  * shrink_dcache_sb - shrink dcache for a superblock
  * @sb: superblock
@@ -912,16 +1060,17 @@ relock:
  */
 void shrink_dcache_sb(struct super_block *sb)
 {
-       LIST_HEAD(tmp);
+       long freed;
 
-       spin_lock(&dcache_lru_lock);
-       while (!list_empty(&sb->s_dentry_lru)) {
-               list_splice_init(&sb->s_dentry_lru, &tmp);
-               spin_unlock(&dcache_lru_lock);
-               shrink_dentry_list(&tmp);
-               spin_lock(&dcache_lru_lock);
-       }
-       spin_unlock(&dcache_lru_lock);
+       do {
+               LIST_HEAD(dispose);
+
+               freed = list_lru_walk(&sb->s_dentry_lru,
+                       dentry_lru_isolate_shrink, &dispose, UINT_MAX);
+
+               this_cpu_sub(nr_dentry_unused, freed);
+               shrink_dentry_list(&dispose);
+       } while (freed > 0);
 }
 EXPORT_SYMBOL(shrink_dcache_sb);
 
@@ -1283,8 +1432,13 @@ static enum d_walk_ret select_collect(void *_data, struct dentry *dentry)
        if (dentry->d_lockref.count) {
                dentry_lru_del(dentry);
        } else if (!(dentry->d_flags & DCACHE_SHRINK_LIST)) {
-               dentry_lru_move_list(dentry, &data->dispose);
-               dentry->d_flags |= DCACHE_SHRINK_LIST;
+               /*
+                * We can't use d_lru_shrink_move() because we
+                * need to get the global LRU lock and do the
+                * LRU accounting.
+                */
+               d_lru_del(dentry);
+               d_shrink_add(dentry, &data->dispose);
                data->found++;
                ret = D_WALK_NORETRY;
        }
@@ -2673,9 +2827,9 @@ static int prepend(char **buffer, int *buflen, const char *str, int namelen)
 
 /**
  * prepend_name - prepend a pathname in front of current buffer pointer
- * buffer: buffer pointer
- * buflen: allocated length of the buffer
- * name:   name string and length qstr structure
+ * @buffer: buffer pointer
+ * @buflen: allocated length of the buffer
+ * @name:   name string and length qstr structure
  *
  * With RCU path tracing, it may race with d_move(). Use ACCESS_ONCE() to
  * make sure that either the old or the new name pointer and length are
@@ -2713,14 +2867,15 @@ static int prepend_name(char **buffer, int *buflen, struct qstr *name)
  * @buffer: pointer to the end of the buffer
  * @buflen: pointer to buffer length
  *
- * The function tries to write out the pathname without taking any lock other
- * than the RCU read lock to make sure that dentries won't go away. It only
- * checks the sequence number of the global rename_lock as any change in the
- * dentry's d_seq will be preceded by changes in the rename_lock sequence
- * number. If the sequence number had been change, it will restart the whole
- * pathname back-tracing sequence again. It performs a total of 3 trials of
- * lockless back-tracing sequences before falling back to take the
- * rename_lock.
+ * The function will first try to write out the pathname without taking any
+ * lock other than the RCU read lock to make sure that dentries won't go away.
+ * It only checks the sequence number of the global rename_lock as any change
+ * in the dentry's d_seq will be preceded by changes in the rename_lock
+ * sequence number. If the sequence number had been changed, it will restart
+ * the whole pathname back-tracing sequence again by taking the rename_lock.
+ * In this case, there is no need to take the RCU read lock as the recursive
+ * parent pointer references will keep the dentry chain alive as long as no
+ * rename operation is performed.
  */
 static int prepend_path(const struct path *path,
                        const struct path *root,
@@ -2868,6 +3023,16 @@ static int prepend_unreachable(char **buffer, int *buflen)
        return prepend(buffer, buflen, "(unreachable)", 13);
 }
 
+static void get_fs_root_rcu(struct fs_struct *fs, struct path *root)
+{
+       unsigned seq;
+
+       do {
+               seq = read_seqcount_begin(&fs->seq);
+               *root = fs->root;
+       } while (read_seqcount_retry(&fs->seq, seq));
+}
+
 /**
  * d_path - return the path of a dentry
  * @path: path to report
@@ -2900,13 +3065,15 @@ char *d_path(const struct path *path, char *buf, int buflen)
        if (path->dentry->d_op && path->dentry->d_op->d_dname)
                return path->dentry->d_op->d_dname(path->dentry, buf, buflen);
 
-       get_fs_root(current->fs, &root);
+       rcu_read_lock();
+       get_fs_root_rcu(current->fs, &root);
        br_read_lock(&vfsmount_lock);
        error = path_with_deleted(path, &root, &res, &buflen);
        br_read_unlock(&vfsmount_lock);
+       rcu_read_unlock();
+
        if (error < 0)
                res = ERR_PTR(error);
-       path_put(&root);
        return res;
 }
 EXPORT_SYMBOL(d_path);
@@ -3014,6 +3181,18 @@ Elong:
        return ERR_PTR(-ENAMETOOLONG);
 }
 
+static void get_fs_root_and_pwd_rcu(struct fs_struct *fs, struct path *root,
+                                   struct path *pwd)
+{
+       unsigned seq;
+
+       do {
+               seq = read_seqcount_begin(&fs->seq);
+               *root = fs->root;
+               *pwd = fs->pwd;
+       } while (read_seqcount_retry(&fs->seq, seq));
+}
+
 /*
  * NOTE! The user-level library version returns a
  * character pointer. The kernel system call just
@@ -3036,23 +3215,25 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
 {
        int error;
        struct path pwd, root;
-       char *page = (char *) __get_free_page(GFP_USER);
+       char *page = __getname();
 
        if (!page)
                return -ENOMEM;
 
-       get_fs_root_and_pwd(current->fs, &root, &pwd);
+       rcu_read_lock();
+       get_fs_root_and_pwd_rcu(current->fs, &root, &pwd);
 
        error = -ENOENT;
        br_read_lock(&vfsmount_lock);
        if (!d_unlinked(pwd.dentry)) {
                unsigned long len;
-               char *cwd = page + PAGE_SIZE;
-               int buflen = PAGE_SIZE;
+               char *cwd = page + PATH_MAX;
+               int buflen = PATH_MAX;
 
                prepend(&cwd, &buflen, "\0", 1);
                error = prepend_path(&pwd, &root, &cwd, &buflen);
                br_read_unlock(&vfsmount_lock);
+               rcu_read_unlock();
 
                if (error < 0)
                        goto out;
@@ -3065,7 +3246,7 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                }
 
                error = -ERANGE;
-               len = PAGE_SIZE + page - cwd;
+               len = PATH_MAX + page - cwd;
                if (len <= size) {
                        error = len;
                        if (copy_to_user(buf, cwd, len))
@@ -3073,12 +3254,11 @@ SYSCALL_DEFINE2(getcwd, char __user *, buf, unsigned long, size)
                }
        } else {
                br_read_unlock(&vfsmount_lock);
+               rcu_read_unlock();
        }
 
 out:
-       path_put(&pwd);
-       path_put(&root);
-       free_page((unsigned long) page);
+       __putname(page);
        return error;
 }
 
index c00e055..9fd702f 100644 (file)
@@ -44,6 +44,7 @@ static void drop_slab(void)
                .gfp_mask = GFP_KERNEL,
        };
 
+       nodes_setall(shrink.nodes_to_scan);
        do {
                nr_objects = shrink_slab(&shrink, 1000, 1000);
        } while (nr_objects > 10);
index d107576..c88e355 100644 (file)
@@ -609,39 +609,35 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
        char *full_alg_name;
        int rc = -EINVAL;
 
-       if (!crypt_stat->cipher) {
-               ecryptfs_printk(KERN_ERR, "No cipher specified\n");
-               goto out;
-       }
        ecryptfs_printk(KERN_DEBUG,
                        "Initializing cipher [%s]; strlen = [%d]; "
                        "key_size_bits = [%zd]\n",
                        crypt_stat->cipher, (int)strlen(crypt_stat->cipher),
                        crypt_stat->key_size << 3);
+       mutex_lock(&crypt_stat->cs_tfm_mutex);
        if (crypt_stat->tfm) {
                rc = 0;
-               goto out;
+               goto out_unlock;
        }
-       mutex_lock(&crypt_stat->cs_tfm_mutex);
        rc = ecryptfs_crypto_api_algify_cipher_name(&full_alg_name,
                                                    crypt_stat->cipher, "cbc");
        if (rc)
                goto out_unlock;
        crypt_stat->tfm = crypto_alloc_ablkcipher(full_alg_name, 0, 0);
-       kfree(full_alg_name);
        if (IS_ERR(crypt_stat->tfm)) {
                rc = PTR_ERR(crypt_stat->tfm);
                crypt_stat->tfm = NULL;
                ecryptfs_printk(KERN_ERR, "cryptfs: init_crypt_ctx(): "
                                "Error initializing cipher [%s]\n",
-                               crypt_stat->cipher);
-               goto out_unlock;
+                               full_alg_name);
+               goto out_free;
        }
        crypto_ablkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
        rc = 0;
+out_free:
+       kfree(full_alg_name);
 out_unlock:
        mutex_unlock(&crypt_stat->cs_tfm_mutex);
-out:
        return rc;
 }
 
index 293f867..473e09d 100644 (file)
@@ -740,6 +740,7 @@ static void ep_free(struct eventpoll *ep)
                epi = rb_entry(rbp, struct epitem, rbn);
 
                ep_unregister_pollwait(ep, epi);
+               cond_resched();
        }
 
        /*
@@ -754,6 +755,7 @@ static void ep_free(struct eventpoll *ep)
        while ((rbp = rb_first(&ep->rbr)) != NULL) {
                epi = rb_entry(rbp, struct epitem, rbn);
                ep_remove(ep, epi);
+               cond_resched();
        }
        mutex_unlock(&ep->mtx);
 
index fd774c7..8875dd1 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -74,6 +74,8 @@ static DEFINE_RWLOCK(binfmt_lock);
 void __register_binfmt(struct linux_binfmt * fmt, int insert)
 {
        BUG_ON(!fmt);
+       if (WARN_ON(!fmt->load_binary))
+               return;
        write_lock(&binfmt_lock);
        insert ? list_add(&fmt->lh, &formats) :
                 list_add_tail(&fmt->lh, &formats);
@@ -266,7 +268,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
        BUILD_BUG_ON(VM_STACK_FLAGS & VM_STACK_INCOMPLETE_SETUP);
        vma->vm_end = STACK_TOP_MAX;
        vma->vm_start = vma->vm_end - PAGE_SIZE;
-       vma->vm_flags = VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
+       vma->vm_flags = VM_SOFTDIRTY | VM_STACK_FLAGS | VM_STACK_INCOMPLETE_SETUP;
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
        INIT_LIST_HEAD(&vma->anon_vma_chain);
 
@@ -1365,18 +1367,18 @@ out:
 }
 EXPORT_SYMBOL(remove_arg_zero);
 
+#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
 /*
  * cycle the list of binary formats handler, until one recognizes the image
  */
 int search_binary_handler(struct linux_binprm *bprm)
 {
-       unsigned int depth = bprm->recursion_depth;
-       int try,retval;
+       bool need_retry = IS_ENABLED(CONFIG_MODULES);
        struct linux_binfmt *fmt;
-       pid_t old_pid, old_vpid;
+       int retval;
 
        /* This allows 4 levels of binfmt rewrites before failing hard. */
-       if (depth > 5)
+       if (bprm->recursion_depth > 5)
                return -ELOOP;
 
        retval = security_bprm_check(bprm);
@@ -1387,71 +1389,67 @@ int search_binary_handler(struct linux_binprm *bprm)
        if (retval)
                return retval;
 
+       retval = -ENOENT;
+ retry:
+       read_lock(&binfmt_lock);
+       list_for_each_entry(fmt, &formats, lh) {
+               if (!try_module_get(fmt->module))
+                       continue;
+               read_unlock(&binfmt_lock);
+               bprm->recursion_depth++;
+               retval = fmt->load_binary(bprm);
+               bprm->recursion_depth--;
+               if (retval >= 0 || retval != -ENOEXEC ||
+                   bprm->mm == NULL || bprm->file == NULL) {
+                       put_binfmt(fmt);
+                       return retval;
+               }
+               read_lock(&binfmt_lock);
+               put_binfmt(fmt);
+       }
+       read_unlock(&binfmt_lock);
+
+       if (need_retry && retval == -ENOEXEC) {
+               if (printable(bprm->buf[0]) && printable(bprm->buf[1]) &&
+                   printable(bprm->buf[2]) && printable(bprm->buf[3]))
+                       return retval;
+               if (request_module("binfmt-%04x", *(ushort *)(bprm->buf + 2)) < 0)
+                       return retval;
+               need_retry = false;
+               goto retry;
+       }
+
+       return retval;
+}
+EXPORT_SYMBOL(search_binary_handler);
+
+static int exec_binprm(struct linux_binprm *bprm)
+{
+       pid_t old_pid, old_vpid;
+       int ret;
+
        /* Need to fetch pid before load_binary changes it */
        old_pid = current->pid;
        rcu_read_lock();
        old_vpid = task_pid_nr_ns(current, task_active_pid_ns(current->parent));
        rcu_read_unlock();
 
-       retval = -ENOENT;
-       for (try=0; try<2; try++) {
-               read_lock(&binfmt_lock);
-               list_for_each_entry(fmt, &formats, lh) {
-                       int (*fn)(struct linux_binprm *) = fmt->load_binary;
-                       if (!fn)
-                               continue;
-                       if (!try_module_get(fmt->module))
-                               continue;
-                       read_unlock(&binfmt_lock);
-                       bprm->recursion_depth = depth + 1;
-                       retval = fn(bprm);
-                       bprm->recursion_depth = depth;
-                       if (retval >= 0) {
-                               if (depth == 0) {
-                                       trace_sched_process_exec(current, old_pid, bprm);
-                                       ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
-                               }
-                               put_binfmt(fmt);
-                               allow_write_access(bprm->file);
-                               if (bprm->file)
-                                       fput(bprm->file);
-                               bprm->file = NULL;
-                               current->did_exec = 1;
-                               proc_exec_connector(current);
-                               return retval;
-                       }
-                       read_lock(&binfmt_lock);
-                       put_binfmt(fmt);
-                       if (retval != -ENOEXEC || bprm->mm == NULL)
-                               break;
-                       if (!bprm->file) {
-                               read_unlock(&binfmt_lock);
-                               return retval;
-                       }
+       ret = search_binary_handler(bprm);
+       if (ret >= 0) {
+               trace_sched_process_exec(current, old_pid, bprm);
+               ptrace_event(PTRACE_EVENT_EXEC, old_vpid);
+               current->did_exec = 1;
+               proc_exec_connector(current);
+
+               if (bprm->file) {
+                       allow_write_access(bprm->file);
+                       fput(bprm->file);
+                       bprm->file = NULL; /* to catch use-after-free */
                }
-               read_unlock(&binfmt_lock);
-#ifdef CONFIG_MODULES
-               if (retval != -ENOEXEC || bprm->mm == NULL) {
-                       break;
-               } else {
-#define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
-                       if (printable(bprm->buf[0]) &&
-                           printable(bprm->buf[1]) &&
-                           printable(bprm->buf[2]) &&
-                           printable(bprm->buf[3]))
-                               break; /* -ENOEXEC */
-                       if (try)
-                               break; /* -ENOEXEC */
-                       request_module("binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
-               }
-#else
-               break;
-#endif
        }
-       return retval;
-}
 
-EXPORT_SYMBOL(search_binary_handler);
+       return ret;
+}
 
 /*
  * sys_execve() executes a new program.
@@ -1541,7 +1539,7 @@ static int do_execve_common(const char *filename,
        if (retval < 0)
                goto out;
 
-       retval = search_binary_handler(bprm);
+       retval = exec_binprm(bprm);
        if (retval < 0)
                goto out;
 
index 2ec8eb1..a52a5d2 100644 (file)
@@ -861,7 +861,7 @@ static int exofs_writepage(struct page *page, struct writeback_control *wbc)
 static void _write_failed(struct inode *inode, loff_t to)
 {
        if (to > inode->i_size)
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
 }
 
 int exofs_write_begin(struct file *file, struct address_space *mapping,
index 0a87bb1..c260de6 100644 (file)
@@ -58,7 +58,7 @@ static void ext2_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                ext2_truncate_blocks(inode, inode->i_size);
        }
 }
index 2d1bdbe..3981ff7 100644 (file)
@@ -931,13 +931,15 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
        struct ext4_inode_info *ei;
        struct list_head *cur, *tmp;
        LIST_HEAD(skipped);
-       int ret, nr_shrunk = 0;
+       int nr_shrunk = 0;
        int retried = 0, skip_precached = 1, nr_skipped = 0;
 
        spin_lock(&sbi->s_es_lru_lock);
 
 retry:
        list_for_each_safe(cur, tmp, &sbi->s_es_lru) {
+               int shrunk;
+
                /*
                 * If we have already reclaimed all extents from extent
                 * status tree, just stop the loop immediately.
@@ -964,13 +966,13 @@ retry:
                        continue;
 
                write_lock(&ei->i_es_lock);
-               ret = __es_try_to_reclaim_extents(ei, nr_to_scan);
+               shrunk = __es_try_to_reclaim_extents(ei, nr_to_scan);
                if (ei->i_es_lru_nr == 0)
                        list_del_init(&ei->i_es_lru);
                write_unlock(&ei->i_es_lock);
 
-               nr_shrunk += ret;
-               nr_to_scan -= ret;
+               nr_shrunk += shrunk;
+               nr_to_scan -= shrunk;
                if (nr_to_scan == 0)
                        break;
        }
@@ -1007,7 +1009,20 @@ retry:
        return nr_shrunk;
 }
 
-static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long ext4_es_count(struct shrinker *shrink,
+                                  struct shrink_control *sc)
+{
+       unsigned long nr;
+       struct ext4_sb_info *sbi;
+
+       sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
+       nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
+       trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr);
+       return nr;
+}
+
+static unsigned long ext4_es_scan(struct shrinker *shrink,
+                                 struct shrink_control *sc)
 {
        struct ext4_sb_info *sbi = container_of(shrink,
                                        struct ext4_sb_info, s_es_shrinker);
@@ -1022,9 +1037,8 @@ static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
 
        nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
 
-       ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt);
        trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret);
-       return ret;
+       return nr_shrunk;
 }
 
 void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
@@ -1032,7 +1046,8 @@ void ext4_es_register_shrinker(struct ext4_sb_info *sbi)
        INIT_LIST_HEAD(&sbi->s_es_lru);
        spin_lock_init(&sbi->s_es_lru_lock);
        sbi->s_es_last_sorted = 0;
-       sbi->s_es_shrinker.shrink = ext4_es_shrink;
+       sbi->s_es_shrinker.scan_objects = ext4_es_scan;
+       sbi->s_es_shrinker.count_objects = ext4_es_count;
        sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
        register_shrinker(&sbi->s_es_shrinker);
 }
@@ -1076,7 +1091,7 @@ static int __es_try_to_reclaim_extents(struct ext4_inode_info *ei,
        struct ext4_es_tree *tree = &ei->i_es_tree;
        struct rb_node *node;
        struct extent_status *es;
-       int nr_shrunk = 0;
+       unsigned long nr_shrunk = 0;
        static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
                                      DEFAULT_RATELIMIT_BURST);
 
index c79fd7d..0d424d7 100644 (file)
@@ -4587,7 +4587,6 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 
        if (attr->ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
                handle_t *handle;
-               loff_t oldsize = inode->i_size;
 
                if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
                        struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -4650,7 +4649,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                 * Truncate pagecache after we've waited for commit
                 * in data=journal mode to make pages freeable.
                 */
-               truncate_pagecache(inode, oldsize, inode->i_size);
+                       truncate_pagecache(inode, inode->i_size);
        }
        /*
         * We want to call ext4_truncate() even if attr->ia_size ==
index 11b51bb..0062da2 100644 (file)
@@ -147,7 +147,7 @@ static void fat_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                fat_truncate_blocks(inode, inode->i_size);
        }
 }
index 322cd37..abdd15a 100644 (file)
@@ -311,8 +311,7 @@ void fput(struct file *file)
                                return;
                        /*
                         * After this task has run exit_task_work(),
-                        * task_work_add() will fail.  free_ipc_ns()->
-                        * shm_destroy() can do this.  Fall through to delayed
+                        * task_work_add() will fail.  Fall through to delayed
                         * fput to avoid leaking *file.
                         */
                }
index 68851ff..9f4935b 100644 (file)
@@ -69,7 +69,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
 {
        struct super_block *sb = inode->i_sb;
 
-       if (strcmp(sb->s_type->name, "bdev") == 0)
+       if (sb_is_blkdev_sb(sb))
                return inode->i_mapping->backing_dev_info;
 
        return sb->s_bdi;
@@ -251,11 +251,13 @@ static int move_expired_inodes(struct list_head *delaying_queue,
                if (work->older_than_this &&
                    inode_dirtied_after(inode, *work->older_than_this))
                        break;
+               list_move(&inode->i_wb_list, &tmp);
+               moved++;
+               if (sb_is_blkdev_sb(inode->i_sb))
+                       continue;
                if (sb && sb != inode->i_sb)
                        do_sb_sort = 1;
                sb = inode->i_sb;
-               list_move(&inode->i_wb_list, &tmp);
-               moved++;
        }
 
        /* just one sb in list, splice to dispatch_queue and we're done */
@@ -723,7 +725,7 @@ static long __writeback_inodes_wb(struct bdi_writeback *wb,
        return wrote;
 }
 
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
+static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
                                enum wb_reason reason)
 {
        struct wb_writeback_work work = {
@@ -1049,10 +1051,8 @@ void wakeup_flusher_threads(long nr_pages, enum wb_reason reason)
 {
        struct backing_dev_info *bdi;
 
-       if (!nr_pages) {
-               nr_pages = global_page_state(NR_FILE_DIRTY) +
-                               global_page_state(NR_UNSTABLE_NFS);
-       }
+       if (!nr_pages)
+               nr_pages = get_nr_dirty_pages();
 
        rcu_read_lock();
        list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) {
@@ -1173,6 +1173,8 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                        bool wakeup_bdi = false;
                        bdi = inode_to_bdi(inode);
 
+                       spin_unlock(&inode->i_lock);
+                       spin_lock(&bdi->wb.list_lock);
                        if (bdi_cap_writeback_dirty(bdi)) {
                                WARN(!test_bit(BDI_registered, &bdi->state),
                                     "bdi-%s not registered\n", bdi->name);
@@ -1187,8 +1189,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                                        wakeup_bdi = true;
                        }
 
-                       spin_unlock(&inode->i_lock);
-                       spin_lock(&bdi->wb.list_lock);
                        inode->dirtied_when = jiffies;
                        list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
                        spin_unlock(&bdi->wb.list_lock);
index 8702b73..73899c1 100644 (file)
@@ -913,7 +913,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
                (1 << FSCACHE_OP_WAITING) |
                (1 << FSCACHE_OP_UNUSE_COOKIE);
 
-       ret = radix_tree_preload(gfp & ~__GFP_HIGHMEM);
+       ret = radix_tree_maybe_preload(gfp & ~__GFP_HIGHMEM);
        if (ret < 0)
                goto nomem_free;
 
index 3ac9108..62b43b5 100644 (file)
@@ -1678,7 +1678,7 @@ int fuse_do_setattr(struct inode *inode, struct iattr *attr,
         * FUSE_NOWRITE, otherwise fuse_launder_page() would deadlock.
         */
        if (S_ISREG(inode->i_mode) && oldsize != outarg.attr.size) {
-               truncate_pagecache(inode, oldsize, outarg.attr.size);
+               truncate_pagecache(inode, outarg.attr.size);
                invalidate_inode_pages2(inode->i_mapping);
        }
 
index e0fe703..a8ce6da 100644 (file)
@@ -218,7 +218,7 @@ void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
                bool inval = false;
 
                if (oldsize != attr->size) {
-                       truncate_pagecache(inode, oldsize, attr->size);
+                       truncate_pagecache(inode, attr->size);
                        inval = true;
                } else if (fc->auto_inval_data) {
                        struct timespec new_mtime = {
@@ -930,7 +930,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
        fc->bdi.name = "fuse";
        fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
        /* fuse does it's own writeback accounting */
-       fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB;
+       fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
 
        err = bdi_init(&fc->bdi);
        if (err)
index 5e2f56f..62a65fc 100644 (file)
@@ -1016,7 +1016,7 @@ static int gfs2_journaled_truncate(struct inode *inode, u64 oldsize, u64 newsize
                chunk = oldsize - newsize;
                if (chunk > max_chunk)
                        chunk = max_chunk;
-               truncate_pagecache(inode, oldsize, oldsize - chunk);
+               truncate_pagecache(inode, oldsize - chunk);
                oldsize -= chunk;
                gfs2_trans_end(sdp);
                error = gfs2_trans_begin(sdp, RES_DINODE, GFS2_JTRUNC_REVOKES);
@@ -1067,7 +1067,7 @@ static int trunc_start(struct inode *inode, u64 oldsize, u64 newsize)
        if (journaled)
                error = gfs2_journaled_truncate(inode, oldsize, newsize);
        else
-               truncate_pagecache(inode, oldsize, newsize);
+               truncate_pagecache(inode, newsize);
 
        if (error) {
                brelse(dibh);
index 722329c..c2f41b4 100644 (file)
@@ -1427,21 +1427,22 @@ __acquires(&lru_lock)
  * gfs2_dispose_glock_lru() above.
  */
 
-static void gfs2_scan_glock_lru(int nr)
+static long gfs2_scan_glock_lru(int nr)
 {
        struct gfs2_glock *gl;
        LIST_HEAD(skipped);
        LIST_HEAD(dispose);
+       long freed = 0;
 
        spin_lock(&lru_lock);
-       while(nr && !list_empty(&lru_list)) {
+       while ((nr-- >= 0) && !list_empty(&lru_list)) {
                gl = list_entry(lru_list.next, struct gfs2_glock, gl_lru);
 
                /* Test for being demotable */
                if (!test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
                        list_move(&gl->gl_lru, &dispose);
                        atomic_dec(&lru_count);
-                       nr--;
+                       freed++;
                        continue;
                }
 
@@ -1451,23 +1452,28 @@ static void gfs2_scan_glock_lru(int nr)
        if (!list_empty(&dispose))
                gfs2_dispose_glock_lru(&dispose);
        spin_unlock(&lru_lock);
+
+       return freed;
 }
 
-static int gfs2_shrink_glock_memory(struct shrinker *shrink,
-                                   struct shrink_control *sc)
+static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
+                                           struct shrink_control *sc)
 {
-       if (sc->nr_to_scan) {
-               if (!(sc->gfp_mask & __GFP_FS))
-                       return -1;
-               gfs2_scan_glock_lru(sc->nr_to_scan);
-       }
+       if (!(sc->gfp_mask & __GFP_FS))
+               return SHRINK_STOP;
+       return gfs2_scan_glock_lru(sc->nr_to_scan);
+}
 
-       return (atomic_read(&lru_count) / 100) * sysctl_vfs_cache_pressure;
+static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
+                                            struct shrink_control *sc)
+{
+       return vfs_pressure_ratio(atomic_read(&lru_count));
 }
 
 static struct shrinker glock_shrinker = {
-       .shrink = gfs2_shrink_glock_memory,
        .seeks = DEFAULT_SEEKS,
+       .count_objects = gfs2_glock_shrink_count,
+       .scan_objects = gfs2_glock_shrink_scan,
 };
 
 /**
index 7b0f504..351586e 100644 (file)
@@ -32,7 +32,8 @@
 struct workqueue_struct *gfs2_control_wq;
 
 static struct shrinker qd_shrinker = {
-       .shrink = gfs2_shrink_qd_memory,
+       .count_objects = gfs2_qd_shrink_count,
+       .scan_objects = gfs2_qd_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };
 
index 3768c2f..db44135 100644 (file)
@@ -75,17 +75,16 @@ static LIST_HEAD(qd_lru_list);
 static atomic_t qd_lru_count = ATOMIC_INIT(0);
 static DEFINE_SPINLOCK(qd_lru_lock);
 
-int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
+unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+                                 struct shrink_control *sc)
 {
        struct gfs2_quota_data *qd;
        struct gfs2_sbd *sdp;
        int nr_to_scan = sc->nr_to_scan;
-
-       if (nr_to_scan == 0)
-               goto out;
+       long freed = 0;
 
        if (!(sc->gfp_mask & __GFP_FS))
-               return -1;
+               return SHRINK_STOP;
 
        spin_lock(&qd_lru_lock);
        while (nr_to_scan && !list_empty(&qd_lru_list)) {
@@ -110,11 +109,16 @@ int gfs2_shrink_qd_memory(struct shrinker *shrink, struct shrink_control *sc)
                kmem_cache_free(gfs2_quotad_cachep, qd);
                spin_lock(&qd_lru_lock);
                nr_to_scan--;
+               freed++;
        }
        spin_unlock(&qd_lru_lock);
+       return freed;
+}
 
-out:
-       return (atomic_read(&qd_lru_count) * sysctl_vfs_cache_pressure) / 100;
+unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
+                                  struct shrink_control *sc)
+{
+       return vfs_pressure_ratio(atomic_read(&qd_lru_count));
 }
 
 static u64 qd2index(struct gfs2_quota_data *qd)
index 4f5e6e4..0f64d9d 100644 (file)
@@ -53,8 +53,10 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip)
        return ret;
 }
 
-extern int gfs2_shrink_qd_memory(struct shrinker *shrink,
-                                struct shrink_control *sc);
+extern unsigned long gfs2_qd_shrink_count(struct shrinker *shrink,
+                                         struct shrink_control *sc);
+extern unsigned long gfs2_qd_shrink_scan(struct shrinker *shrink,
+                                        struct shrink_control *sc);
 extern const struct quotactl_ops gfs2_quotactl_ops;
 
 #endif /* __QUOTA_DOT_H__ */
index f9299d8..380ab31 100644 (file)
@@ -41,7 +41,7 @@ static void hfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                hfs_file_truncate(inode);
        }
 }
index a633718..24bc20f 100644 (file)
@@ -11,3 +11,21 @@ config HFSPLUS_FS
          MacOS 8. It includes all Mac specific filesystem data such as
          data forks and creator codes, but it also has several UNIX
          style features such as file ownership and permissions.
+
+config HFSPLUS_FS_POSIX_ACL
+       bool "HFS+ POSIX Access Control Lists"
+       depends on HFSPLUS_FS
+       select FS_POSIX_ACL
+       help
+         POSIX Access Control Lists (ACLs) support permissions for users and
+         groups beyond the owner/group/world scheme.
+
+         To learn more about Access Control Lists, visit the POSIX ACLs for
+         Linux website <http://acl.bestbits.at/>.
+
+         It needs to understand that POSIX ACLs are treated only under
+         Linux. POSIX ACLs doesn't mean something under Mac OS X.
+         Mac OS X beginning with version 10.4 ("Tiger") support NFSv4 ACLs,
+         which are part of the NFSv4 standard.
+
+         If you don't know what Access Control Lists are, say N
index 09d278b..683fca2 100644 (file)
@@ -7,3 +7,5 @@ obj-$(CONFIG_HFSPLUS_FS) += hfsplus.o
 hfsplus-objs := super.o options.o inode.o ioctl.o extents.o catalog.o dir.o btree.o \
                bnode.o brec.o bfind.o tables.o unicode.o wrapper.o bitmap.o part_tbl.o \
                attributes.o xattr.o xattr_user.o xattr_security.o xattr_trusted.o
+
+hfsplus-$(CONFIG_HFSPLUS_FS_POSIX_ACL) += posix_acl.o
diff --git a/fs/hfsplus/acl.h b/fs/hfsplus/acl.h
new file mode 100644 (file)
index 0000000..07c0d49
--- /dev/null
@@ -0,0 +1,30 @@
+/*
+ * linux/fs/hfsplus/acl.h
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for Posix Access Control Lists (ACLs) support.
+ */
+
+#include <linux/posix_acl_xattr.h>
+
+#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
+
+/* posix_acl.c */
+struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type);
+extern int hfsplus_posix_acl_chmod(struct inode *);
+extern int hfsplus_init_posix_acl(struct inode *, struct inode *);
+
+#else  /* CONFIG_HFSPLUS_FS_POSIX_ACL */
+#define hfsplus_get_posix_acl NULL
+
+static inline int hfsplus_posix_acl_chmod(struct inode *inode)
+{
+       return 0;
+}
+
+static inline int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
+{
+       return 0;
+}
+#endif  /* CONFIG_HFSPLUS_FS_POSIX_ACL */
index d8ce4bd..4a4fea0 100644 (file)
@@ -16,6 +16,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
 #include "xattr.h"
+#include "acl.h"
 
 static inline void hfsplus_instantiate(struct dentry *dentry,
                                       struct inode *inode, u32 cnid)
@@ -529,6 +530,9 @@ const struct inode_operations hfsplus_dir_inode_operations = {
        .getxattr               = generic_getxattr,
        .listxattr              = hfsplus_listxattr,
        .removexattr            = hfsplus_removexattr,
+#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
+       .get_acl                = hfsplus_get_posix_acl,
+#endif
 };
 
 const struct file_operations hfsplus_dir_operations = {
index ede7931..2b9cd01 100644 (file)
@@ -30,6 +30,7 @@
 #define DBG_EXTENT     0x00000020
 #define DBG_BITMAP     0x00000040
 #define DBG_ATTR_MOD   0x00000080
+#define DBG_ACL_MOD    0x00000100
 
 #if 0
 #define DBG_MASK       (DBG_EXTENT|DBG_INODE|DBG_BNODE_MOD)
index f833d35..37213d0 100644 (file)
@@ -19,6 +19,7 @@
 #include "hfsplus_fs.h"
 #include "hfsplus_raw.h"
 #include "xattr.h"
+#include "acl.h"
 
 static int hfsplus_readpage(struct file *file, struct page *page)
 {
@@ -35,7 +36,7 @@ static void hfsplus_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                hfsplus_file_truncate(inode);
        }
 }
@@ -316,6 +317,13 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
 
        setattr_copy(inode, attr);
        mark_inode_dirty(inode);
+
+       if (attr->ia_valid & ATTR_MODE) {
+               error = hfsplus_posix_acl_chmod(inode);
+               if (unlikely(error))
+                       return error;
+       }
+
        return 0;
 }
 
@@ -383,6 +391,9 @@ static const struct inode_operations hfsplus_file_inode_operations = {
        .getxattr       = generic_getxattr,
        .listxattr      = hfsplus_listxattr,
        .removexattr    = hfsplus_removexattr,
+#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
+       .get_acl        = hfsplus_get_posix_acl,
+#endif
 };
 
 static const struct file_operations hfsplus_file_operations = {
diff --git a/fs/hfsplus/posix_acl.c b/fs/hfsplus/posix_acl.c
new file mode 100644 (file)
index 0000000..b609cc1
--- /dev/null
@@ -0,0 +1,274 @@
+/*
+ * linux/fs/hfsplus/posix_acl.c
+ *
+ * Vyacheslav Dubeyko <slava@dubeyko.com>
+ *
+ * Handler for Posix Access Control Lists (ACLs) support.
+ */
+
+#include "hfsplus_fs.h"
+#include "xattr.h"
+#include "acl.h"
+
+struct posix_acl *hfsplus_get_posix_acl(struct inode *inode, int type)
+{
+       struct posix_acl *acl;
+       char *xattr_name;
+       char *value = NULL;
+       ssize_t size;
+
+       acl = get_cached_acl(inode, type);
+       if (acl != ACL_NOT_CACHED)
+               return acl;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               xattr_name = POSIX_ACL_XATTR_ACCESS;
+               break;
+       case ACL_TYPE_DEFAULT:
+               xattr_name = POSIX_ACL_XATTR_DEFAULT;
+               break;
+       default:
+               return ERR_PTR(-EINVAL);
+       }
+
+       size = __hfsplus_getxattr(inode, xattr_name, NULL, 0);
+
+       if (size > 0) {
+               value = (char *)hfsplus_alloc_attr_entry();
+               if (unlikely(!value))
+                       return ERR_PTR(-ENOMEM);
+               size = __hfsplus_getxattr(inode, xattr_name, value, size);
+       }
+
+       if (size > 0)
+               acl = posix_acl_from_xattr(&init_user_ns, value, size);
+       else if (size == -ENODATA)
+               acl = NULL;
+       else
+               acl = ERR_PTR(size);
+
+       hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
+
+       if (!IS_ERR(acl))
+               set_cached_acl(inode, type, acl);
+
+       return acl;
+}
+
+static int hfsplus_set_posix_acl(struct inode *inode,
+                                       int type,
+                                       struct posix_acl *acl)
+{
+       int err;
+       char *xattr_name;
+       size_t size = 0;
+       char *value = NULL;
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       switch (type) {
+       case ACL_TYPE_ACCESS:
+               xattr_name = POSIX_ACL_XATTR_ACCESS;
+               if (acl) {
+                       err = posix_acl_equiv_mode(acl, &inode->i_mode);
+                       if (err < 0)
+                               return err;
+               }
+               err = 0;
+               break;
+
+       case ACL_TYPE_DEFAULT:
+               xattr_name = POSIX_ACL_XATTR_DEFAULT;
+               if (!S_ISDIR(inode->i_mode))
+                       return acl ? -EACCES : 0;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       if (acl) {
+               size = posix_acl_xattr_size(acl->a_count);
+               if (unlikely(size > HFSPLUS_MAX_INLINE_DATA_SIZE))
+                       return -ENOMEM;
+               value = (char *)hfsplus_alloc_attr_entry();
+               if (unlikely(!value))
+                       return -ENOMEM;
+               err = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+               if (unlikely(err < 0))
+                       goto end_set_acl;
+       }
+
+       err = __hfsplus_setxattr(inode, xattr_name, value, size, 0);
+
+end_set_acl:
+       hfsplus_destroy_attr_entry((hfsplus_attr_entry *)value);
+
+       if (!err)
+               set_cached_acl(inode, type, acl);
+
+       return err;
+}
+
+int hfsplus_init_posix_acl(struct inode *inode, struct inode *dir)
+{
+       int err = 0;
+       struct posix_acl *acl = NULL;
+
+       hfs_dbg(ACL_MOD,
+               "[%s]: ino %lu, dir->ino %lu\n",
+               __func__, inode->i_ino, dir->i_ino);
+
+       if (S_ISLNK(inode->i_mode))
+               return 0;
+
+       acl = hfsplus_get_posix_acl(dir, ACL_TYPE_DEFAULT);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+
+       if (acl) {
+               if (S_ISDIR(inode->i_mode)) {
+                       err = hfsplus_set_posix_acl(inode,
+                                                       ACL_TYPE_DEFAULT,
+                                                       acl);
+                       if (unlikely(err))
+                               goto init_acl_cleanup;
+               }
+
+               err = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
+               if (unlikely(err < 0))
+                       return err;
+
+               if (err > 0)
+                       err = hfsplus_set_posix_acl(inode,
+                                                       ACL_TYPE_ACCESS,
+                                                       acl);
+       } else
+               inode->i_mode &= ~current_umask();
+
+init_acl_cleanup:
+       posix_acl_release(acl);
+       return err;
+}
+
+int hfsplus_posix_acl_chmod(struct inode *inode)
+{
+       int err;
+       struct posix_acl *acl;
+
+       hfs_dbg(ACL_MOD, "[%s]: ino %lu\n", __func__, inode->i_ino);
+
+       if (S_ISLNK(inode->i_mode))
+               return -EOPNOTSUPP;
+
+       acl = hfsplus_get_posix_acl(inode, ACL_TYPE_ACCESS);
+       if (IS_ERR(acl) || !acl)
+               return PTR_ERR(acl);
+
+       err = posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+       if (unlikely(err))
+               return err;
+
+       err = hfsplus_set_posix_acl(inode, ACL_TYPE_ACCESS, acl);
+       posix_acl_release(acl);
+       return err;
+}
+
+static int hfsplus_xattr_get_posix_acl(struct dentry *dentry,
+                                       const char *name,
+                                       void *buffer,
+                                       size_t size,
+                                       int type)
+{
+       int err = 0;
+       struct posix_acl *acl;
+
+       hfs_dbg(ACL_MOD,
+               "[%s]: ino %lu, buffer %p, size %zu, type %#x\n",
+               __func__, dentry->d_inode->i_ino, buffer, size, type);
+
+       if (strcmp(name, "") != 0)
+               return -EINVAL;
+
+       acl = hfsplus_get_posix_acl(dentry->d_inode, type);
+       if (IS_ERR(acl))
+               return PTR_ERR(acl);
+       if (acl == NULL)
+               return -ENODATA;
+
+       err = posix_acl_to_xattr(&init_user_ns, acl, buffer, size);
+       posix_acl_release(acl);
+
+       return err;
+}
+
+static int hfsplus_xattr_set_posix_acl(struct dentry *dentry,
+                                       const char *name,
+                                       const void *value,
+                                       size_t size,
+                                       int flags,
+                                       int type)
+{
+       int err = 0;
+       struct inode *inode = dentry->d_inode;
+       struct posix_acl *acl = NULL;
+
+       hfs_dbg(ACL_MOD,
+               "[%s]: ino %lu, value %p, size %zu, flags %#x, type %#x\n",
+               __func__, inode->i_ino, value, size, flags, type);
+
+       if (strcmp(name, "") != 0)
+               return -EINVAL;
+
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
+       if (value) {
+               acl = posix_acl_from_xattr(&init_user_ns, value, size);
+               if (IS_ERR(acl))
+                       return PTR_ERR(acl);
+               else if (acl) {
+                       err = posix_acl_valid(acl);
+                       if (err)
+                               goto end_xattr_set_acl;
+               }
+       }
+
+       err = hfsplus_set_posix_acl(inode, type, acl);
+
+end_xattr_set_acl:
+       posix_acl_release(acl);
+       return err;
+}
+
+static size_t hfsplus_xattr_list_posix_acl(struct dentry *dentry,
+                                               char *list,
+                                               size_t list_size,
+                                               const char *name,
+                                               size_t name_len,
+                                               int type)
+{
+       /*
+        * This method is not used.
+        * It is used hfsplus_listxattr() instead of generic_listxattr().
+        */
+       return -EOPNOTSUPP;
+}
+
+const struct xattr_handler hfsplus_xattr_acl_access_handler = {
+       .prefix = POSIX_ACL_XATTR_ACCESS,
+       .flags  = ACL_TYPE_ACCESS,
+       .list   = hfsplus_xattr_list_posix_acl,
+       .get    = hfsplus_xattr_get_posix_acl,
+       .set    = hfsplus_xattr_set_posix_acl,
+};
+
+const struct xattr_handler hfsplus_xattr_acl_default_handler = {
+       .prefix = POSIX_ACL_XATTR_DEFAULT,
+       .flags  = ACL_TYPE_DEFAULT,
+       .list   = hfsplus_xattr_list_posix_acl,
+       .get    = hfsplus_xattr_get_posix_acl,
+       .set    = hfsplus_xattr_set_posix_acl,
+};
index f663461..bd8471f 100644 (file)
@@ -8,11 +8,16 @@
 
 #include "hfsplus_fs.h"
 #include "xattr.h"
+#include "acl.h"
 
 const struct xattr_handler *hfsplus_xattr_handlers[] = {
        &hfsplus_xattr_osx_handler,
        &hfsplus_xattr_user_handler,
        &hfsplus_xattr_trusted_handler,
+#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
+       &hfsplus_xattr_acl_access_handler,
+       &hfsplus_xattr_acl_default_handler,
+#endif
        &hfsplus_xattr_security_handler,
        NULL
 };
@@ -46,11 +51,58 @@ static inline int is_known_namespace(const char *name)
        return true;
 }
 
+static int can_set_system_xattr(struct inode *inode, const char *name,
+                               const void *value, size_t size)
+{
+#ifdef CONFIG_HFSPLUS_FS_POSIX_ACL
+       struct posix_acl *acl;
+       int err;
+
+       if (!inode_owner_or_capable(inode))
+               return -EPERM;
+
+       /*
+        * POSIX_ACL_XATTR_ACCESS is tied to i_mode
+        */
+       if (strcmp(name, POSIX_ACL_XATTR_ACCESS) == 0) {
+               acl = posix_acl_from_xattr(&init_user_ns, value, size);
+               if (IS_ERR(acl))
+                       return PTR_ERR(acl);
+               if (acl) {
+                       err = posix_acl_equiv_mode(acl, &inode->i_mode);
+                       posix_acl_release(acl);
+                       if (err < 0)
+                               return err;
+                       mark_inode_dirty(inode);
+               }
+               /*
+                * We're changing the ACL.  Get rid of the cached one
+                */
+               forget_cached_acl(inode, ACL_TYPE_ACCESS);
+
+               return 0;
+       } else if (strcmp(name, POSIX_ACL_XATTR_DEFAULT) == 0) {
+               acl = posix_acl_from_xattr(&init_user_ns, value, size);
+               if (IS_ERR(acl))
+                       return PTR_ERR(acl);
+               posix_acl_release(acl);
+
+               /*
+                * We're changing the default ACL.  Get rid of the cached one
+                */
+               forget_cached_acl(inode, ACL_TYPE_DEFAULT);
+
+               return 0;
+       }
+#endif /* CONFIG_HFSPLUS_FS_POSIX_ACL */
+       return -EOPNOTSUPP;
+}
+
 static int can_set_xattr(struct inode *inode, const char *name,
                                const void *value, size_t value_len)
 {
        if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
-               return -EOPNOTSUPP; /* TODO: implement ACL support */
+               return can_set_system_xattr(inode, name, value, value_len);
 
        if (!strncmp(name, XATTR_MAC_OSX_PREFIX, XATTR_MAC_OSX_PREFIX_LEN)) {
                /*
@@ -253,11 +305,10 @@ static int copy_name(char *buffer, const char *xattr_name, int name_len)
        return len;
 }
 
-static ssize_t hfsplus_getxattr_finder_info(struct dentry *dentry,
+static ssize_t hfsplus_getxattr_finder_info(struct inode *inode,
                                                void *value, size_t size)
 {
        ssize_t res = 0;
-       struct inode *inode = dentry->d_inode;
        struct hfs_find_data fd;
        u16 entry_type;
        u16 folder_rec_len = sizeof(struct DInfo) + sizeof(struct DXInfo);
@@ -304,10 +355,9 @@ end_getxattr_finder_info:
        return res;
 }
 
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
                         void *value, size_t size)
 {
-       struct inode *inode = dentry->d_inode;
        struct hfs_find_data fd;
        hfsplus_attr_entry *entry;
        __be32 xattr_record_type;
@@ -333,7 +383,7 @@ ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
        }
 
        if (!strcmp_xattr_finder_info(name))
-               return hfsplus_getxattr_finder_info(dentry, value, size);
+               return hfsplus_getxattr_finder_info(inode, value, size);
 
        if (!HFSPLUS_SB(inode->i_sb)->attr_tree)
                return -EOPNOTSUPP;
index 847b695..841b569 100644 (file)
@@ -14,8 +14,8 @@
 extern const struct xattr_handler hfsplus_xattr_osx_handler;
 extern const struct xattr_handler hfsplus_xattr_user_handler;
 extern const struct xattr_handler hfsplus_xattr_trusted_handler;
-/*extern const struct xattr_handler hfsplus_xattr_acl_access_handler;*/
-/*extern const struct xattr_handler hfsplus_xattr_acl_default_handler;*/
+extern const struct xattr_handler hfsplus_xattr_acl_access_handler;
+extern const struct xattr_handler hfsplus_xattr_acl_default_handler;
 extern const struct xattr_handler hfsplus_xattr_security_handler;
 
 extern const struct xattr_handler *hfsplus_xattr_handlers[];
@@ -29,9 +29,17 @@ static inline int hfsplus_setxattr(struct dentry *dentry, const char *name,
        return __hfsplus_setxattr(dentry->d_inode, name, value, size, flags);
 }
 
-ssize_t hfsplus_getxattr(struct dentry *dentry, const char *name,
+ssize_t __hfsplus_getxattr(struct inode *inode, const char *name,
                        void *value, size_t size);
 
+static inline ssize_t hfsplus_getxattr(struct dentry *dentry,
+                                       const char *name,
+                                       void *value,
+                                       size_t size)
+{
+       return __hfsplus_getxattr(dentry->d_inode, name, value, size);
+}
+
 ssize_t hfsplus_listxattr(struct dentry *dentry, char *buffer, size_t size);
 
 int hfsplus_removexattr(struct dentry *dentry, const char *name);
@@ -39,22 +47,7 @@ int hfsplus_removexattr(struct dentry *dentry, const char *name);
 int hfsplus_init_security(struct inode *inode, struct inode *dir,
                                const struct qstr *qstr);
 
-static inline int hfsplus_init_acl(struct inode *inode, struct inode *dir)
-{
-       /*TODO: implement*/
-       return 0;
-}
-
-static inline int hfsplus_init_inode_security(struct inode *inode,
-                                               struct inode *dir,
-                                               const struct qstr *qstr)
-{
-       int err;
-
-       err = hfsplus_init_acl(inode, dir);
-       if (!err)
-               err = hfsplus_init_security(inode, dir, qstr);
-       return err;
-}
+int hfsplus_init_inode_security(struct inode *inode, struct inode *dir,
+                               const struct qstr *qstr);
 
 #endif
index 83b842f..0072276 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/security.h>
 #include "hfsplus_fs.h"
 #include "xattr.h"
+#include "acl.h"
 
 static int hfsplus_security_getxattr(struct dentry *dentry, const char *name,
                                        void *buffer, size_t size, int type)
@@ -96,6 +97,18 @@ int hfsplus_init_security(struct inode *inode, struct inode *dir,
                                        &hfsplus_initxattrs, NULL);
 }
 
+int hfsplus_init_inode_security(struct inode *inode,
+                                               struct inode *dir,
+                                               const struct qstr *qstr)
+{
+       int err;
+
+       err = hfsplus_init_posix_acl(inode, dir);
+       if (!err)
+               err = hfsplus_init_security(inode, dir, qstr);
+       return err;
+}
+
 const struct xattr_handler hfsplus_xattr_security_handler = {
        .prefix = XATTR_SECURITY_PREFIX,
        .list   = hfsplus_security_listxattr,
index 4e9dabc..67c1a61 100644 (file)
@@ -138,7 +138,7 @@ static void hpfs_write_failed(struct address_space *mapping, loff_t to)
        hpfs_lock(inode->i_sb);
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                hpfs_truncate(inode);
        }
 
index 93a0625..b33ba8e 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
+#include <linux/list_lru.h>
 #include "internal.h"
 
 /*
@@ -24,7 +25,7 @@
  *
  * inode->i_lock protects:
  *   inode->i_state, inode->i_hash, __iget()
- * inode->i_sb->s_inode_lru_lock protects:
+ * Inode LRU list locks protect:
  *   inode->i_sb->s_inode_lru, inode->i_lru
  * inode_sb_list_lock protects:
  *   sb->s_inodes, inode->i_sb_list
@@ -37,7 +38,7 @@
  *
  * inode_sb_list_lock
  *   inode->i_lock
- *     inode->i_sb->s_inode_lru_lock
+ *     Inode LRU list locks
  *
  * bdi->wb.list_lock
  *   inode->i_lock
@@ -70,33 +71,33 @@ EXPORT_SYMBOL(empty_aops);
  */
 struct inodes_stat_t inodes_stat;
 
-static DEFINE_PER_CPU(unsigned int, nr_inodes);
-static DEFINE_PER_CPU(unsigned int, nr_unused);
+static DEFINE_PER_CPU(unsigned long, nr_inodes);
+static DEFINE_PER_CPU(unsigned long, nr_unused);
 
 static struct kmem_cache *inode_cachep __read_mostly;
 
-static int get_nr_inodes(void)
+static long get_nr_inodes(void)
 {
        int i;
-       int sum = 0;
+       long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_inodes, i);
        return sum < 0 ? 0 : sum;
 }
 
-static inline int get_nr_inodes_unused(void)
+static inline long get_nr_inodes_unused(void)
 {
        int i;
-       int sum = 0;
+       long sum = 0;
        for_each_possible_cpu(i)
                sum += per_cpu(nr_unused, i);
        return sum < 0 ? 0 : sum;
 }
 
-int get_nr_dirty_inodes(void)
+long get_nr_dirty_inodes(void)
 {
        /* not actually dirty inodes, but a wild approximation */
-       int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+       long nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
        return nr_dirty > 0 ? nr_dirty : 0;
 }
 
@@ -109,7 +110,7 @@ int proc_nr_inodes(ctl_table *table, int write,
 {
        inodes_stat.nr_inodes = get_nr_inodes();
        inodes_stat.nr_unused = get_nr_inodes_unused();
-       return proc_dointvec(table, write, buffer, lenp, ppos);
+       return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #endif
 
@@ -401,13 +402,8 @@ EXPORT_SYMBOL(ihold);
 
 static void inode_lru_list_add(struct inode *inode)
 {
-       spin_lock(&inode->i_sb->s_inode_lru_lock);
-       if (list_empty(&inode->i_lru)) {
-               list_add(&inode->i_lru, &inode->i_sb->s_inode_lru);
-               inode->i_sb->s_nr_inodes_unused++;
+       if (list_lru_add(&inode->i_sb->s_inode_lru, &inode->i_lru))
                this_cpu_inc(nr_unused);
-       }
-       spin_unlock(&inode->i_sb->s_inode_lru_lock);
 }
 
 /*
@@ -425,13 +421,9 @@ void inode_add_lru(struct inode *inode)
 
 static void inode_lru_list_del(struct inode *inode)
 {
-       spin_lock(&inode->i_sb->s_inode_lru_lock);
-       if (!list_empty(&inode->i_lru)) {
-               list_del_init(&inode->i_lru);
-               inode->i_sb->s_nr_inodes_unused--;
+
+       if (list_lru_del(&inode->i_sb->s_inode_lru, &inode->i_lru))
                this_cpu_dec(nr_unused);
-       }
-       spin_unlock(&inode->i_sb->s_inode_lru_lock);
 }
 
 /**
@@ -675,24 +667,8 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
        return busy;
 }
 
-static int can_unuse(struct inode *inode)
-{
-       if (inode->i_state & ~I_REFERENCED)
-               return 0;
-       if (inode_has_buffers(inode))
-               return 0;
-       if (atomic_read(&inode->i_count))
-               return 0;
-       if (inode->i_data.nrpages)
-               return 0;
-       return 1;
-}
-
 /*
- * Walk the superblock inode LRU for freeable inodes and attempt to free them.
- * This is called from the superblock shrinker function with a number of inodes
- * to trim from the LRU. Inodes to be freed are moved to a temporary list and
- * then are freed outside inode_lock by dispose_list().
+ * Isolate the inode from the LRU in preparation for freeing it.
  *
  * Any inodes which are pinned purely because of attached pagecache have their
  * pagecache removed.  If the inode has metadata buffers attached to
@@ -706,89 +682,82 @@ static int can_unuse(struct inode *inode)
  * LRU does not have strict ordering. Hence we don't want to reclaim inodes
  * with this flag set because they are the inodes that are out of order.
  */
-void prune_icache_sb(struct super_block *sb, int nr_to_scan)
+static enum lru_status
+inode_lru_isolate(struct list_head *item, spinlock_t *lru_lock, void *arg)
 {
-       LIST_HEAD(freeable);
-       int nr_scanned;
-       unsigned long reap = 0;
+       struct list_head *freeable = arg;
+       struct inode    *inode = container_of(item, struct inode, i_lru);
 
-       spin_lock(&sb->s_inode_lru_lock);
-       for (nr_scanned = nr_to_scan; nr_scanned >= 0; nr_scanned--) {
-               struct inode *inode;
+       /*
+        * we are inverting the lru lock/inode->i_lock here, so use a trylock.
+        * If we fail to get the lock, just skip it.
+        */
+       if (!spin_trylock(&inode->i_lock))
+               return LRU_SKIP;
 
-               if (list_empty(&sb->s_inode_lru))
-                       break;
+       /*
+        * Referenced or dirty inodes are still in use. Give them another pass
+        * through the LRU as we canot reclaim them now.
+        */
+       if (atomic_read(&inode->i_count) ||
+           (inode->i_state & ~I_REFERENCED)) {
+               list_del_init(&inode->i_lru);
+               spin_unlock(&inode->i_lock);
+               this_cpu_dec(nr_unused);
+               return LRU_REMOVED;
+       }
 
-               inode = list_entry(sb->s_inode_lru.prev, struct inode, i_lru);
+       /* recently referenced inodes get one more pass */
+       if (inode->i_state & I_REFERENCED) {
+               inode->i_state &= ~I_REFERENCED;
+               spin_unlock(&inode->i_lock);
+               return LRU_ROTATE;
+       }
 
-               /*
-                * we are inverting the sb->s_inode_lru_lock/inode->i_lock here,
-                * so use a trylock. If we fail to get the lock, just move the
-                * inode to the back of the list so we don't spin on it.
-                */
-               if (!spin_trylock(&inode->i_lock)) {
-                       list_move(&inode->i_lru, &sb->s_inode_lru);
-                       continue;
+       if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+               __iget(inode);
+               spin_unlock(&inode->i_lock);
+               spin_unlock(lru_lock);
+               if (remove_inode_buffers(inode)) {
+                       unsigned long reap;
+                       reap = invalidate_mapping_pages(&inode->i_data, 0, -1);
+                       if (current_is_kswapd())
+                               __count_vm_events(KSWAPD_INODESTEAL, reap);
+                       else
+                               __count_vm_events(PGINODESTEAL, reap);
+                       if (current->reclaim_state)
+                               current->reclaim_state->reclaimed_slab += reap;
                }
+               iput(inode);
+               spin_lock(lru_lock);
+               return LRU_RETRY;
+       }
 
-               /*
-                * Referenced or dirty inodes are still in use. Give them
-                * another pass through the LRU as we canot reclaim them now.
-                */
-               if (atomic_read(&inode->i_count) ||
-                   (inode->i_state & ~I_REFERENCED)) {
-                       list_del_init(&inode->i_lru);
-                       spin_unlock(&inode->i_lock);
-                       sb->s_nr_inodes_unused--;
-                       this_cpu_dec(nr_unused);
-                       continue;
-               }
+       WARN_ON(inode->i_state & I_NEW);
+       inode->i_state |= I_FREEING;
+       list_move(&inode->i_lru, freeable);
+       spin_unlock(&inode->i_lock);
 
-               /* recently referenced inodes get one more pass */
-               if (inode->i_state & I_REFERENCED) {
-                       inode->i_state &= ~I_REFERENCED;
-                       list_move(&inode->i_lru, &sb->s_inode_lru);
-                       spin_unlock(&inode->i_lock);
-                       continue;
-               }
-               if (inode_has_buffers(inode) || inode->i_data.nrpages) {
-                       __iget(inode);
-                       spin_unlock(&inode->i_lock);
-                       spin_unlock(&sb->s_inode_lru_lock);
-                       if (remove_inode_buffers(inode))
-                               reap += invalidate_mapping_pages(&inode->i_data,
-                                                               0, -1);
-                       iput(inode);
-                       spin_lock(&sb->s_inode_lru_lock);
-
-                       if (inode != list_entry(sb->s_inode_lru.next,
-                                               struct inode, i_lru))
-                               continue;       /* wrong inode or list_empty */
-                       /* avoid lock inversions with trylock */
-                       if (!spin_trylock(&inode->i_lock))
-                               continue;
-                       if (!can_unuse(inode)) {
-                               spin_unlock(&inode->i_lock);
-                               continue;
-                       }
-               }
-               WARN_ON(inode->i_state & I_NEW);
-               inode->i_state |= I_FREEING;
-               spin_unlock(&inode->i_lock);
+       this_cpu_dec(nr_unused);
+       return LRU_REMOVED;
+}
 
-               list_move(&inode->i_lru, &freeable);
-               sb->s_nr_inodes_unused--;
-               this_cpu_dec(nr_unused);
-       }
-       if (current_is_kswapd())
-               __count_vm_events(KSWAPD_INODESTEAL, reap);
-       else
-               __count_vm_events(PGINODESTEAL, reap);
-       spin_unlock(&sb->s_inode_lru_lock);
-       if (current->reclaim_state)
-               current->reclaim_state->reclaimed_slab += reap;
+/*
+ * Walk the superblock inode LRU for freeable inodes and attempt to free them.
+ * This is called from the superblock shrinker function with a number of inodes
+ * to trim from the LRU. Inodes to be freed are moved to a temporary list and
+ * then are freed outside inode_lock by dispose_list().
+ */
+long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                    int nid)
+{
+       LIST_HEAD(freeable);
+       long freed;
 
+       freed = list_lru_walk_node(&sb->s_inode_lru, nid, inode_lru_isolate,
+                                      &freeable, &nr_to_scan);
        dispose_list(&freeable);
+       return freed;
 }
 
 static void __wait_on_freeing_inode(struct inode *inode);
index 2be46ea..513e0d8 100644 (file)
@@ -114,6 +114,8 @@ extern int open_check_o_direct(struct file *f);
  * inode.c
  */
 extern spinlock_t inode_sb_list_lock;
+extern long prune_icache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                           int nid);
 extern void inode_add_lru(struct inode *inode);
 
 /*
@@ -121,7 +123,7 @@ extern void inode_add_lru(struct inode *inode);
  */
 extern void inode_wb_list_del(struct inode *inode);
 
-extern int get_nr_dirty_inodes(void);
+extern long get_nr_dirty_inodes(void);
 extern void evict_inodes(struct super_block *);
 extern int invalidate_inodes(struct super_block *, bool);
 
@@ -130,6 +132,8 @@ extern int invalidate_inodes(struct super_block *, bool);
  */
 extern struct dentry *__d_alloc(struct super_block *, const struct qstr *);
 extern int d_set_mounted(struct dentry *dentry);
+extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
+                           int nid);
 
 /*
  * read_write.c
index 730f24e..f4aab71 100644 (file)
@@ -306,7 +306,7 @@ static void jfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                jfs_truncate(inode);
        }
 }
index 8c32ef3..e519e45 100644 (file)
@@ -86,18 +86,6 @@ static LIST_HEAD(mb_cache_list);
 static LIST_HEAD(mb_cache_lru_list);
 static DEFINE_SPINLOCK(mb_cache_spinlock);
 
-/*
- * What the mbcache registers as to get shrunk dynamically.
- */
-
-static int mb_cache_shrink_fn(struct shrinker *shrink,
-                             struct shrink_control *sc);
-
-static struct shrinker mb_cache_shrinker = {
-       .shrink = mb_cache_shrink_fn,
-       .seeks = DEFAULT_SEEKS,
-};
-
 static inline int
 __mb_cache_entry_is_hashed(struct mb_cache_entry *ce)
 {
@@ -151,7 +139,7 @@ forget:
 
 
 /*
- * mb_cache_shrink_fn()  memory pressure callback
+ * mb_cache_shrink_scan()  memory pressure callback
  *
  * This function is called by the kernel memory management when memory
  * gets low.
@@ -159,17 +147,16 @@ forget:
  * @shrink: (ignored)
  * @sc: shrink_control passed from reclaim
  *
- * Returns the number of objects which are present in the cache.
+ * Returns the number of objects freed.
  */
-static int
-mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(free_list);
-       struct mb_cache *cache;
        struct mb_cache_entry *entry, *tmp;
-       int count = 0;
        int nr_to_scan = sc->nr_to_scan;
        gfp_t gfp_mask = sc->gfp_mask;
+       unsigned long freed = 0;
 
        mb_debug("trying to free %d entries", nr_to_scan);
        spin_lock(&mb_cache_spinlock);
@@ -179,19 +166,37 @@ mb_cache_shrink_fn(struct shrinker *shrink, struct shrink_control *sc)
                                   struct mb_cache_entry, e_lru_list);
                list_move_tail(&ce->e_lru_list, &free_list);
                __mb_cache_entry_unhash(ce);
+               freed++;
+       }
+       spin_unlock(&mb_cache_spinlock);
+       list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
+               __mb_cache_entry_forget(entry, gfp_mask);
        }
+       return freed;
+}
+
+static unsigned long
+mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       struct mb_cache *cache;
+       unsigned long count = 0;
+
+       spin_lock(&mb_cache_spinlock);
        list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
                mb_debug("cache %s (%d)", cache->c_name,
                          atomic_read(&cache->c_entry_count));
                count += atomic_read(&cache->c_entry_count);
        }
        spin_unlock(&mb_cache_spinlock);
-       list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
-               __mb_cache_entry_forget(entry, gfp_mask);
-       }
-       return (count / 100) * sysctl_vfs_cache_pressure;
+
+       return vfs_pressure_ratio(count);
 }
 
+static struct shrinker mb_cache_shrinker = {
+       .count_objects = mb_cache_shrink_count,
+       .scan_objects = mb_cache_shrink_scan,
+       .seeks = DEFAULT_SEEKS,
+};
 
 /*
  * mb_cache_create()  create a new cache
index df12249..0332109 100644 (file)
@@ -400,7 +400,7 @@ static void minix_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                minix_truncate(inode);
        }
 }
index 409a441..0dc4cbf 100644 (file)
@@ -660,29 +660,6 @@ static __always_inline void set_root_rcu(struct nameidata *nd)
        }
 }
 
-static __always_inline int __vfs_follow_link(struct nameidata *nd, const char *link)
-{
-       int ret;
-
-       if (IS_ERR(link))
-               goto fail;
-
-       if (*link == '/') {
-               set_root(nd);
-               path_put(&nd->path);
-               nd->path = nd->root;
-               path_get(&nd->root);
-               nd->flags |= LOOKUP_JUMPED;
-       }
-       nd->inode = nd->path.dentry->d_inode;
-
-       ret = link_path_walk(link, nd);
-       return ret;
-fail:
-       path_put(&nd->path);
-       return PTR_ERR(link);
-}
-
 static void path_put_conditional(struct path *path, struct nameidata *nd)
 {
        dput(path->dentry);
@@ -874,7 +851,20 @@ follow_link(struct path *link, struct nameidata *nd, void **p)
        error = 0;
        s = nd_get_link(nd);
        if (s) {
-               error = __vfs_follow_link(nd, s);
+               if (unlikely(IS_ERR(s))) {
+                       path_put(&nd->path);
+                       put_link(nd, link, *p);
+                       return PTR_ERR(s);
+               }
+               if (*s == '/') {
+                       set_root(nd);
+                       path_put(&nd->path);
+                       nd->path = nd->root;
+                       path_get(&nd->root);
+                       nd->flags |= LOOKUP_JUMPED;
+               }
+               nd->inode = nd->path.dentry->d_inode;
+               error = link_path_walk(s, nd);
                if (unlikely(error))
                        put_link(nd, link, *p);
        }
@@ -2271,12 +2261,15 @@ mountpoint_last(struct nameidata *nd, struct path *path)
                dentry = d_alloc(dir, &nd->last);
                if (!dentry) {
                        error = -ENOMEM;
+                       mutex_unlock(&dir->d_inode->i_mutex);
                        goto out;
                }
                dentry = lookup_real(dir->d_inode, dentry, nd->flags);
                error = PTR_ERR(dentry);
-               if (IS_ERR(dentry))
+               if (IS_ERR(dentry)) {
+                       mutex_unlock(&dir->d_inode->i_mutex);
                        goto out;
+               }
        }
        mutex_unlock(&dir->d_inode->i_mutex);
 
@@ -4236,11 +4229,6 @@ int generic_readlink(struct dentry *dentry, char __user *buffer, int buflen)
        return res;
 }
 
-int vfs_follow_link(struct nameidata *nd, const char *link)
-{
-       return __vfs_follow_link(nd, link);
-}
-
 /* get the link contents into pagecache */
 static char *page_getlink(struct dentry * dentry, struct page **ppage)
 {
@@ -4352,7 +4340,6 @@ EXPORT_SYMBOL(vfs_path_lookup);
 EXPORT_SYMBOL(inode_permission);
 EXPORT_SYMBOL(unlock_rename);
 EXPORT_SYMBOL(vfs_create);
-EXPORT_SYMBOL(vfs_follow_link);
 EXPORT_SYMBOL(vfs_link);
 EXPORT_SYMBOL(vfs_mkdir);
 EXPORT_SYMBOL(vfs_mknod);
index 25845d1..da5c494 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/security.h>
 #include <linux/idr.h>
 #include <linux/acct.h>                /* acct_auto_close_mnt */
-#include <linux/ramfs.h>       /* init_rootfs */
+#include <linux/init.h>                /* init_rootfs */
 #include <linux/fs_struct.h>   /* get_fs_root et.al. */
 #include <linux/fsnotify.h>    /* fsnotify_vfsmount_delete */
 #include <linux/uaccess.h>
index e79bc6c..de434f3 100644 (file)
@@ -2006,17 +2006,18 @@ static void nfs_access_free_list(struct list_head *head)
        }
 }
 
-int nfs_access_cache_shrinker(struct shrinker *shrink,
-                             struct shrink_control *sc)
+unsigned long
+nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        LIST_HEAD(head);
        struct nfs_inode *nfsi, *next;
        struct nfs_access_entry *cache;
        int nr_to_scan = sc->nr_to_scan;
        gfp_t gfp_mask = sc->gfp_mask;
+       long freed = 0;
 
        if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
-               return (nr_to_scan == 0) ? 0 : -1;
+               return SHRINK_STOP;
 
        spin_lock(&nfs_access_lru_lock);
        list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
@@ -2032,6 +2033,7 @@ int nfs_access_cache_shrinker(struct shrinker *shrink,
                                struct nfs_access_entry, lru);
                list_move(&cache->lru, &head);
                rb_erase(&cache->rb_node, &nfsi->access_cache);
+               freed++;
                if (!list_empty(&nfsi->access_cache_entry_lru))
                        list_move_tail(&nfsi->access_cache_inode_lru,
                                        &nfs_access_lru_list);
@@ -2046,7 +2048,13 @@ remove_lru_entry:
        }
        spin_unlock(&nfs_access_lru_lock);
        nfs_access_free_list(&head);
-       return (atomic_long_read(&nfs_access_nr_entries) / 100) * sysctl_vfs_cache_pressure;
+       return freed;
+}
+
+unsigned long
+nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
+{
+       return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
 }
 
 static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
index 0bd7a55..91ff089 100644 (file)
@@ -130,7 +130,6 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, loff_
 
        return -EINVAL;
 #else
-       VM_BUG_ON(iocb->ki_left != PAGE_SIZE);
        VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
        if (rw == READ || rw == KERNEL_READ)
index 87e7976..eda8879 100644 (file)
@@ -541,7 +541,6 @@ EXPORT_SYMBOL_GPL(nfs_setattr);
  */
 static int nfs_vmtruncate(struct inode * inode, loff_t offset)
 {
-       loff_t oldsize;
        int err;
 
        err = inode_newsize_ok(inode, offset);
@@ -549,11 +548,10 @@ static int nfs_vmtruncate(struct inode * inode, loff_t offset)
                goto out;
 
        spin_lock(&inode->i_lock);
-       oldsize = inode->i_size;
        i_size_write(inode, offset);
        spin_unlock(&inode->i_lock);
 
-       truncate_pagecache(inode, oldsize, offset);
+       truncate_pagecache(inode, offset);
 out:
        return err;
 }
index d388302..38da8c2 100644 (file)
@@ -273,8 +273,10 @@ extern struct nfs_client *nfs_init_client(struct nfs_client *clp,
                           const char *ip_addr);
 
 /* dir.c */
-extern int nfs_access_cache_shrinker(struct shrinker *shrink,
-                                       struct shrink_control *sc);
+extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
+                                           struct shrink_control *sc);
+extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
+                                          struct shrink_control *sc);
 struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
 int nfs_create(struct inode *, struct dentry *, umode_t, bool);
 int nfs_mkdir(struct inode *, struct dentry *, umode_t);
index f520a11..28842ab 100644 (file)
@@ -279,15 +279,15 @@ _nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
        if (test_bit(sp4_mode, &clp->cl_sp4_flags)) {
                spin_lock(&clp->cl_lock);
                if (clp->cl_machine_cred != NULL)
-                       newcred = get_rpccred(clp->cl_machine_cred);
+                       /* don't call get_rpccred on the machine cred -
+                        * a reference will be held for life of clp */
+                       newcred = clp->cl_machine_cred;
                spin_unlock(&clp->cl_lock);
-               if (msg->rpc_cred)
-                       put_rpccred(msg->rpc_cred);
                msg->rpc_cred = newcred;
 
                flavor = clp->cl_rpcclient->cl_auth->au_flavor;
-               WARN_ON(flavor != RPC_AUTH_GSS_KRB5I &&
-                       flavor != RPC_AUTH_GSS_KRB5P);
+               WARN_ON_ONCE(flavor != RPC_AUTH_GSS_KRB5I &&
+                            flavor != RPC_AUTH_GSS_KRB5P);
                *clntp = clp->cl_rpcclient;
 
                return true;
index 39b6cf2..989bb9d 100644 (file)
@@ -6001,10 +6001,12 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
                .rpc_resp = &res,
        };
        struct rpc_clnt *clnt = NFS_SERVER(dir)->client;
+       struct rpc_cred *cred = NULL;
 
        if (use_integrity) {
                clnt = NFS_SERVER(dir)->nfs_client->cl_rpcclient;
-               msg.rpc_cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+               cred = nfs4_get_clid_cred(NFS_SERVER(dir)->nfs_client);
+               msg.rpc_cred = cred;
        }
 
        dprintk("NFS call  secinfo %s\n", name->name);
@@ -6016,8 +6018,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct
                                &res.seq_res, 0);
        dprintk("NFS reply  secinfo: %d\n", status);
 
-       if (msg.rpc_cred)
-               put_rpccred(msg.rpc_cred);
+       if (cred)
+               put_rpccred(cred);
 
        return status;
 }
@@ -6151,11 +6153,13 @@ static const struct nfs41_state_protection nfs4_sp4_mach_cred_request = {
        },
        .allow.u.words = {
                [0] = 1 << (OP_CLOSE) |
-                     1 << (OP_LOCKU),
+                     1 << (OP_LOCKU) |
+                     1 << (OP_COMMIT),
                [1] = 1 << (OP_SECINFO - 32) |
                      1 << (OP_SECINFO_NO_NAME - 32) |
                      1 << (OP_TEST_STATEID - 32) |
-                     1 << (OP_FREE_STATEID - 32)
+                     1 << (OP_FREE_STATEID - 32) |
+                     1 << (OP_WRITE - 32)
        }
 };
 
@@ -7496,11 +7500,13 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
                .rpc_resp = &res,
        };
        struct rpc_clnt *clnt = server->client;
+       struct rpc_cred *cred = NULL;
        int status;
 
        if (use_integrity) {
                clnt = server->nfs_client->cl_rpcclient;
-               msg.rpc_cred = nfs4_get_clid_cred(server->nfs_client);
+               cred = nfs4_get_clid_cred(server->nfs_client);
+               msg.rpc_cred = cred;
        }
 
        dprintk("--> %s\n", __func__);
@@ -7508,8 +7514,8 @@ _nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
                                &res.seq_res, 0);
        dprintk("<-- %s status=%d\n", __func__, status);
 
-       if (msg.rpc_cred)
-               put_rpccred(msg.rpc_cred);
+       if (cred)
+               put_rpccred(cred);
 
        return status;
 }
index fbdad9e..79210d2 100644 (file)
@@ -414,7 +414,7 @@ static int nfs4_stat_to_errno(int);
 #define decode_test_stateid_maxsz      (op_decode_hdr_maxsz + 2 + 1)
 #define encode_free_stateid_maxsz      (op_encode_hdr_maxsz + 1 + \
                                         XDR_QUADLEN(NFS4_STATEID_SIZE))
-#define decode_free_stateid_maxsz      (op_decode_hdr_maxsz + 1)
+#define decode_free_stateid_maxsz      (op_decode_hdr_maxsz)
 #else /* CONFIG_NFS_V4_1 */
 #define encode_sequence_maxsz  0
 #define decode_sequence_maxsz  0
@@ -5966,21 +5966,8 @@ out:
 static int decode_free_stateid(struct xdr_stream *xdr,
                               struct nfs41_free_stateid_res *res)
 {
-       __be32 *p;
-       int status;
-
-       status = decode_op_hdr(xdr, OP_FREE_STATEID);
-       if (status)
-               return status;
-
-       p = xdr_inline_decode(xdr, 4);
-       if (unlikely(!p))
-               goto out_overflow;
-       res->status = be32_to_cpup(p++);
+       res->status = decode_op_hdr(xdr, OP_FREE_STATEID);
        return res->status;
-out_overflow:
-       print_overflow_msg(__func__, xdr);
-       return -EIO;
 }
 #endif /* CONFIG_NFS_V4_1 */
 
index 5793f24..a03b9c6 100644 (file)
@@ -360,7 +360,8 @@ static void unregister_nfs4_fs(void)
 #endif
 
 static struct shrinker acl_shrinker = {
-       .shrink         = nfs_access_cache_shrinker,
+       .count_objects  = nfs_access_cache_count,
+       .scan_objects   = nfs_access_cache_scan,
        .seeks          = DEFAULT_SEEKS,
 };
 
index e76244e..9186c7c 100644 (file)
@@ -59,11 +59,14 @@ static unsigned int         longest_chain_cachesize;
 
 static int     nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 static void    cache_cleaner_func(struct work_struct *unused);
-static int     nfsd_reply_cache_shrink(struct shrinker *shrink,
-                                       struct shrink_control *sc);
+static unsigned long nfsd_reply_cache_count(struct shrinker *shrink,
+                                           struct shrink_control *sc);
+static unsigned long nfsd_reply_cache_scan(struct shrinker *shrink,
+                                          struct shrink_control *sc);
 
 static struct shrinker nfsd_reply_cache_shrinker = {
-       .shrink = nfsd_reply_cache_shrink,
+       .scan_objects = nfsd_reply_cache_scan,
+       .count_objects = nfsd_reply_cache_count,
        .seeks  = 1,
 };
 
@@ -232,16 +235,18 @@ nfsd_cache_entry_expired(struct svc_cacherep *rp)
  * Walk the LRU list and prune off entries that are older than RC_EXPIRE.
  * Also prune the oldest ones when the total exceeds the max number of entries.
  */
-static void
+static long
 prune_cache_entries(void)
 {
        struct svc_cacherep *rp, *tmp;
+       long freed = 0;
 
        list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
                if (!nfsd_cache_entry_expired(rp) &&
                    num_drc_entries <= max_drc_entries)
                        break;
                nfsd_reply_cache_free_locked(rp);
+               freed++;
        }
 
        /*
@@ -254,6 +259,7 @@ prune_cache_entries(void)
                cancel_delayed_work(&cache_cleaner);
        else
                mod_delayed_work(system_wq, &cache_cleaner, RC_EXPIRE);
+       return freed;
 }
 
 static void
@@ -264,20 +270,28 @@ cache_cleaner_func(struct work_struct *unused)
        spin_unlock(&cache_lock);
 }
 
-static int
-nfsd_reply_cache_shrink(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+nfsd_reply_cache_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-       unsigned int num;
+       unsigned long num;
 
        spin_lock(&cache_lock);
-       if (sc->nr_to_scan)
-               prune_cache_entries();
        num = num_drc_entries;
        spin_unlock(&cache_lock);
 
        return num;
 }
 
+static unsigned long
+nfsd_reply_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
+{
+       unsigned long freed;
+
+       spin_lock(&cache_lock);
+       freed = prune_cache_entries();
+       spin_unlock(&cache_lock);
+       return freed;
+}
 /*
  * Walk an xdr_buf and get a CRC for at most the first RC_CSUMLEN bytes
  */
index b1a5277..7e350c5 100644 (file)
@@ -254,7 +254,7 @@ void nilfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                nilfs_truncate(inode);
        }
 }
index c5670b8..ea4ba9d 100644 (file)
@@ -1768,7 +1768,7 @@ static void ntfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                ntfs_truncate_vfs(inode);
        }
 }
index 8a40457..b4f788e 100644 (file)
@@ -51,10 +51,6 @@ static struct posix_acl *ocfs2_acl_from_xattr(const void *value, size_t size)
                return ERR_PTR(-EINVAL);
 
        count = size / sizeof(struct posix_acl_entry);
-       if (count < 0)
-               return ERR_PTR(-EINVAL);
-       if (count == 0)
-               return NULL;
 
        acl = posix_acl_alloc(count, GFP_NOFS);
        if (!acl)
index 94417a8..f37d3c0 100644 (file)
@@ -2044,7 +2044,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
 
 out_write_size:
        pos += copied;
-       if (pos > inode->i_size) {
+       if (pos > i_size_read(inode)) {
                i_size_write(inode, pos);
                mark_inode_dirty(inode);
        }
index 5c1c864..363f0dc 100644 (file)
@@ -628,11 +628,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
                                struct o2nm_node *node,
                                int idx)
 {
-       struct list_head *iter;
        struct o2hb_callback_func *f;
 
-       list_for_each(iter, &hbcall->list) {
-               f = list_entry(iter, struct o2hb_callback_func, hc_item);
+       list_for_each_entry(f, &hbcall->list, hc_item) {
                mlog(ML_HEARTBEAT, "calling funcs %p\n", f);
                (f->hc_func)(node, idx, f->hc_data);
        }
@@ -641,16 +639,9 @@ static void o2hb_fire_callbacks(struct o2hb_callback *hbcall,
 /* Will run the list in order until we process the passed event */
 static void o2hb_run_event_list(struct o2hb_node_event *queued_event)
 {
-       int empty;
        struct o2hb_callback *hbcall;
        struct o2hb_node_event *event;
 
-       spin_lock(&o2hb_live_lock);
-       empty = list_empty(&queued_event->hn_item);
-       spin_unlock(&o2hb_live_lock);
-       if (empty)
-               return;
-
        /* Holding callback sem assures we don't alter the callback
         * lists when doing this, and serializes ourselves with other
         * processes wanting callbacks. */
@@ -709,6 +700,7 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
        struct o2hb_node_event event =
                { .hn_item = LIST_HEAD_INIT(event.hn_item), };
        struct o2nm_node *node;
+       int queued = 0;
 
        node = o2nm_get_node_by_num(slot->ds_node_num);
        if (!node)
@@ -726,11 +718,13 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
 
                        o2hb_queue_node_event(&event, O2HB_NODE_DOWN_CB, node,
                                              slot->ds_node_num);
+                       queued = 1;
                }
        }
        spin_unlock(&o2hb_live_lock);
 
-       o2hb_run_event_list(&event);
+       if (queued)
+               o2hb_run_event_list(&event);
 
        o2nm_node_put(node);
 }
@@ -790,6 +784,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
        unsigned int dead_ms = o2hb_dead_threshold * O2HB_REGION_TIMEOUT_MS;
        unsigned int slot_dead_ms;
        int tmp;
+       int queued = 0;
 
        memcpy(hb_block, slot->ds_raw_block, reg->hr_block_bytes);
 
@@ -883,6 +878,7 @@ fire_callbacks:
                                              slot->ds_node_num);
 
                        changed = 1;
+                       queued = 1;
                }
 
                list_add_tail(&slot->ds_live_item,
@@ -934,6 +930,7 @@ fire_callbacks:
                                              node, slot->ds_node_num);
 
                        changed = 1;
+                       queued = 1;
                }
 
                /* We don't clear this because the node is still
@@ -949,7 +946,8 @@ fire_callbacks:
 out:
        spin_unlock(&o2hb_live_lock);
 
-       o2hb_run_event_list(&event);
+       if (queued)
+               o2hb_run_event_list(&event);
 
        if (node)
                o2nm_node_put(node);
@@ -2516,8 +2514,7 @@ unlock:
 int o2hb_register_callback(const char *region_uuid,
                           struct o2hb_callback_func *hc)
 {
-       struct o2hb_callback_func *tmp;
-       struct list_head *iter;
+       struct o2hb_callback_func *f;
        struct o2hb_callback *hbcall;
        int ret;
 
@@ -2540,10 +2537,9 @@ int o2hb_register_callback(const char *region_uuid,
 
        down_write(&o2hb_callback_sem);
 
-       list_for_each(iter, &hbcall->list) {
-               tmp = list_entry(iter, struct o2hb_callback_func, hc_item);
-               if (hc->hc_priority < tmp->hc_priority) {
-                       list_add_tail(&hc->hc_item, iter);
+       list_for_each_entry(f, &hbcall->list, hc_item) {
+               if (hc->hc_priority < f->hc_priority) {
+                       list_add_tail(&hc->hc_item, &f->hc_item);
                        break;
                }
        }
index d644dc6..2cd2406 100644 (file)
@@ -543,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn,
        }
 
        if (was_valid && !valid) {
-               printk(KERN_NOTICE "o2net: No longer connected to "
-                      SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
+               if (old_sc)
+                       printk(KERN_NOTICE "o2net: No longer connected to "
+                               SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc));
                o2net_complete_nodes_nsw(nn);
        }
 
@@ -765,32 +766,32 @@ static struct o2net_msg_handler *
 o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p,
                          struct rb_node **ret_parent)
 {
-        struct rb_node **p = &o2net_handler_tree.rb_node;
-        struct rb_node *parent = NULL;
+       struct rb_node **p = &o2net_handler_tree.rb_node;
+       struct rb_node *parent = NULL;
        struct o2net_msg_handler *nmh, *ret = NULL;
        int cmp;
 
-        while (*p) {
-                parent = *p;
-                nmh = rb_entry(parent, struct o2net_msg_handler, nh_node);
+       while (*p) {
+               parent = *p;
+               nmh = rb_entry(parent, struct o2net_msg_handler, nh_node);
                cmp = o2net_handler_cmp(nmh, msg_type, key);
 
-                if (cmp < 0)
-                        p = &(*p)->rb_left;
-                else if (cmp > 0)
-                        p = &(*p)->rb_right;
-                else {
+               if (cmp < 0)
+                       p = &(*p)->rb_left;
+               else if (cmp > 0)
+                       p = &(*p)->rb_right;
+               else {
                        ret = nmh;
-                        break;
+                       break;
                }
-        }
+       }
 
-        if (ret_p != NULL)
-                *ret_p = p;
-        if (ret_parent != NULL)
-                *ret_parent = parent;
+       if (ret_p != NULL)
+               *ret_p = p;
+       if (ret_parent != NULL)
+               *ret_parent = parent;
 
-        return ret;
+       return ret;
 }
 
 static void o2net_handler_kref_release(struct kref *kref)
@@ -1695,13 +1696,12 @@ static void o2net_start_connect(struct work_struct *work)
                ret = 0;
 
 out:
-       if (ret) {
+       if (ret && sc) {
                printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT
                       " failed with errno %d\n", SC_NODEF_ARGS(sc), ret);
                /* 0 err so that another will be queued and attempted
                 * from set_nn_state */
-               if (sc)
-                       o2net_ensure_shutdown(nn, sc, 0);
+               o2net_ensure_shutdown(nn, sc, 0);
        }
        if (sc)
                sc_put(sc);
@@ -1873,12 +1873,16 @@ static int o2net_accept_one(struct socket *sock)
 
        if (o2nm_this_node() >= node->nd_num) {
                local_node = o2nm_get_node_by_num(o2nm_this_node());
-               printk(KERN_NOTICE "o2net: Unexpected connect attempt seen "
-                      "at node '%s' (%u, %pI4:%d) from node '%s' (%u, "
-                      "%pI4:%d)\n", local_node->nd_name, local_node->nd_num,
-                      &(local_node->nd_ipv4_address),
-                      ntohs(local_node->nd_ipv4_port), node->nd_name,
-                      node->nd_num, &sin.sin_addr.s_addr, ntohs(sin.sin_port));
+               if (local_node)
+                       printk(KERN_NOTICE "o2net: Unexpected connect attempt "
+                                       "seen at node '%s' (%u, %pI4:%d) from "
+                                       "node '%s' (%u, %pI4:%d)\n",
+                                       local_node->nd_name, local_node->nd_num,
+                                       &(local_node->nd_ipv4_address),
+                                       ntohs(local_node->nd_ipv4_port),
+                                       node->nd_name,
+                                       node->nd_num, &sin.sin_addr.s_addr,
+                                       ntohs(sin.sin_port));
                ret = -EINVAL;
                goto out;
        }
index fbec0be..b46278f 100644 (file)
@@ -292,7 +292,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
        struct dlm_lock *lock = NULL;
        struct dlm_proxy_ast *past = (struct dlm_proxy_ast *) msg->buf;
        char *name;
-       struct list_head *iter, *head=NULL;
+       struct list_head *head = NULL;
        __be64 cookie;
        u32 flags;
        u8 node;
@@ -373,8 +373,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
        /* try convert queue for both ast/bast */
        head = &res->converting;
        lock = NULL;
-       list_for_each(iter, head) {
-               lock = list_entry (iter, struct dlm_lock, list);
+       list_for_each_entry(lock, head, list) {
                if (lock->ml.cookie == cookie)
                        goto do_ast;
        }
@@ -385,8 +384,7 @@ int dlm_proxy_ast_handler(struct o2net_msg *msg, u32 len, void *data,
        else
                head = &res->granted;
 
-       list_for_each(iter, head) {
-               lock = list_entry (iter, struct dlm_lock, list);
+       list_for_each_entry(lock, head, list) {
                if (lock->ml.cookie == cookie)
                        goto do_ast;
        }
index de854cc..e051776 100644 (file)
@@ -1079,11 +1079,9 @@ static inline int dlm_lock_compatible(int existing, int request)
 static inline int dlm_lock_on_list(struct list_head *head,
                                   struct dlm_lock *lock)
 {
-       struct list_head *iter;
        struct dlm_lock *tmplock;
 
-       list_for_each(iter, head) {
-               tmplock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(tmplock, head, list) {
                if (tmplock == lock)
                        return 1;
        }
index 29a886d..e36d63f 100644 (file)
@@ -123,7 +123,6 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
                                           int *kick_thread)
 {
        enum dlm_status status = DLM_NORMAL;
-       struct list_head *iter;
        struct dlm_lock *tmplock=NULL;
 
        assert_spin_locked(&res->spinlock);
@@ -185,16 +184,14 @@ static enum dlm_status __dlmconvert_master(struct dlm_ctxt *dlm,
 
        /* upconvert from here on */
        status = DLM_NORMAL;
-       list_for_each(iter, &res->granted) {
-               tmplock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(tmplock, &res->granted, list) {
                if (tmplock == lock)
                        continue;
                if (!dlm_lock_compatible(tmplock->ml.type, type))
                        goto switch_queues;
        }
 
-       list_for_each(iter, &res->converting) {
-               tmplock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(tmplock, &res->converting, list) {
                if (!dlm_lock_compatible(tmplock->ml.type, type))
                        goto switch_queues;
                /* existing conversion requests take precedence */
@@ -424,8 +421,8 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
        struct dlm_ctxt *dlm = data;
        struct dlm_convert_lock *cnv = (struct dlm_convert_lock *)msg->buf;
        struct dlm_lock_resource *res = NULL;
-       struct list_head *iter;
        struct dlm_lock *lock = NULL;
+       struct dlm_lock *tmp_lock;
        struct dlm_lockstatus *lksb;
        enum dlm_status status = DLM_NORMAL;
        u32 flags;
@@ -471,14 +468,13 @@ int dlm_convert_lock_handler(struct o2net_msg *msg, u32 len, void *data,
                dlm_error(status);
                goto leave;
        }
-       list_for_each(iter, &res->granted) {
-               lock = list_entry(iter, struct dlm_lock, list);
-               if (lock->ml.cookie == cnv->cookie &&
-                   lock->ml.node == cnv->node_idx) {
+       list_for_each_entry(tmp_lock, &res->granted, list) {
+               if (tmp_lock->ml.cookie == cnv->cookie &&
+                   tmp_lock->ml.node == cnv->node_idx) {
+                       lock = tmp_lock;
                        dlm_lock_get(lock);
                        break;
                }
-               lock = NULL;
        }
        spin_unlock(&res->spinlock);
        if (!lock) {
index 0e28e24..e33cd7a 100644 (file)
@@ -96,7 +96,6 @@ static void __dlm_print_lock(struct dlm_lock *lock)
 
 void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
 {
-       struct list_head *iter2;
        struct dlm_lock *lock;
        char buf[DLM_LOCKID_NAME_MAX];
 
@@ -118,18 +117,15 @@ void __dlm_print_one_lock_resource(struct dlm_lock_resource *res)
               res->inflight_locks, atomic_read(&res->asts_reserved));
        dlm_print_lockres_refmap(res);
        printk("  granted queue:\n");
-       list_for_each(iter2, &res->granted) {
-               lock = list_entry(iter2, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->granted, list) {
                __dlm_print_lock(lock);
        }
        printk("  converting queue:\n");
-       list_for_each(iter2, &res->converting) {
-               lock = list_entry(iter2, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->converting, list) {
                __dlm_print_lock(lock);
        }
        printk("  blocked queue:\n");
-       list_for_each(iter2, &res->blocked) {
-               lock = list_entry(iter2, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->blocked, list) {
                __dlm_print_lock(lock);
        }
 }
@@ -446,7 +442,6 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
 {
        struct dlm_master_list_entry *mle;
        struct hlist_head *bucket;
-       struct hlist_node *list;
        int i, out = 0;
        unsigned long total = 0, longest = 0, bucket_count = 0;
 
@@ -456,9 +451,7 @@ static int debug_mle_print(struct dlm_ctxt *dlm, char *buf, int len)
        spin_lock(&dlm->master_lock);
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
                bucket = dlm_master_hash(dlm, i);
-               hlist_for_each(list, bucket) {
-                       mle = hlist_entry(list, struct dlm_master_list_entry,
-                                         master_hash_node);
+               hlist_for_each_entry(mle, bucket, master_hash_node) {
                        ++total;
                        ++bucket_count;
                        if (len - out < 200)
index dbb17c0..8b3382a 100644 (file)
@@ -193,7 +193,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
                                                     unsigned int hash)
 {
        struct hlist_head *bucket;
-       struct hlist_node *list;
+       struct dlm_lock_resource *res;
 
        mlog(0, "%.*s\n", len, name);
 
@@ -201,9 +201,7 @@ struct dlm_lock_resource * __dlm_lookup_lockres_full(struct dlm_ctxt *dlm,
 
        bucket = dlm_lockres_hash(dlm, hash);
 
-       hlist_for_each(list, bucket) {
-               struct dlm_lock_resource *res = hlist_entry(list,
-                       struct dlm_lock_resource, hash_node);
+       hlist_for_each_entry(res, bucket, hash_node) {
                if (res->lockname.name[0] != name[0])
                        continue;
                if (unlikely(res->lockname.len != len))
@@ -262,22 +260,19 @@ struct dlm_lock_resource * dlm_lookup_lockres(struct dlm_ctxt *dlm,
 
 static struct dlm_ctxt * __dlm_lookup_domain_full(const char *domain, int len)
 {
-       struct dlm_ctxt *tmp = NULL;
-       struct list_head *iter;
+       struct dlm_ctxt *tmp;
 
        assert_spin_locked(&dlm_domain_lock);
 
        /* tmp->name here is always NULL terminated,
         * but domain may not be! */
-       list_for_each(iter, &dlm_domains) {
-               tmp = list_entry (iter, struct dlm_ctxt, list);
+       list_for_each_entry(tmp, &dlm_domains, list) {
                if (strlen(tmp->name) == len &&
                    memcmp(tmp->name, domain, len)==0)
-                       break;
-               tmp = NULL;
+                       return tmp;
        }
 
-       return tmp;
+       return NULL;
 }
 
 /* For null terminated domain strings ONLY */
@@ -366,25 +361,22 @@ static void __dlm_get(struct dlm_ctxt *dlm)
  * you shouldn't trust your pointer. */
 struct dlm_ctxt *dlm_grab(struct dlm_ctxt *dlm)
 {
-       struct list_head *iter;
-       struct dlm_ctxt *target = NULL;
+       struct dlm_ctxt *target;
+       struct dlm_ctxt *ret = NULL;
 
        spin_lock(&dlm_domain_lock);
 
-       list_for_each(iter, &dlm_domains) {
-               target = list_entry (iter, struct dlm_ctxt, list);
-
+       list_for_each_entry(target, &dlm_domains, list) {
                if (target == dlm) {
                        __dlm_get(target);
+                       ret = target;
                        break;
                }
-
-               target = NULL;
        }
 
        spin_unlock(&dlm_domain_lock);
 
-       return target;
+       return ret;
 }
 
 int dlm_domain_fully_joined(struct dlm_ctxt *dlm)
@@ -2296,13 +2288,10 @@ static DECLARE_RWSEM(dlm_callback_sem);
 void dlm_fire_domain_eviction_callbacks(struct dlm_ctxt *dlm,
                                        int node_num)
 {
-       struct list_head *iter;
        struct dlm_eviction_cb *cb;
 
        down_read(&dlm_callback_sem);
-       list_for_each(iter, &dlm->dlm_eviction_callbacks) {
-               cb = list_entry(iter, struct dlm_eviction_cb, ec_item);
-
+       list_for_each_entry(cb, &dlm->dlm_eviction_callbacks, ec_item) {
                cb->ec_func(node_num, cb->ec_data);
        }
        up_read(&dlm_callback_sem);
index 47e67c2..5d32f75 100644 (file)
@@ -91,19 +91,14 @@ void dlm_destroy_lock_cache(void)
 static int dlm_can_grant_new_lock(struct dlm_lock_resource *res,
                                  struct dlm_lock *lock)
 {
-       struct list_head *iter;
        struct dlm_lock *tmplock;
 
-       list_for_each(iter, &res->granted) {
-               tmplock = list_entry(iter, struct dlm_lock, list);
-
+       list_for_each_entry(tmplock, &res->granted, list) {
                if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
                        return 0;
        }
 
-       list_for_each(iter, &res->converting) {
-               tmplock = list_entry(iter, struct dlm_lock, list);
-
+       list_for_each_entry(tmplock, &res->converting, list) {
                if (!dlm_lock_compatible(tmplock->ml.type, lock->ml.type))
                        return 0;
                if (!dlm_lock_compatible(tmplock->ml.convert_type,
index 33ecbe0..cf0f103 100644 (file)
@@ -342,16 +342,13 @@ static int dlm_find_mle(struct dlm_ctxt *dlm,
 {
        struct dlm_master_list_entry *tmpmle;
        struct hlist_head *bucket;
-       struct hlist_node *list;
        unsigned int hash;
 
        assert_spin_locked(&dlm->master_lock);
 
        hash = dlm_lockid_hash(name, namelen);
        bucket = dlm_master_hash(dlm, hash);
-       hlist_for_each(list, bucket) {
-               tmpmle = hlist_entry(list, struct dlm_master_list_entry,
-                                    master_hash_node);
+       hlist_for_each_entry(tmpmle, bucket, master_hash_node) {
                if (!dlm_mle_equal(dlm, tmpmle, name, namelen))
                        continue;
                dlm_get_mle(tmpmle);
@@ -3183,7 +3180,7 @@ void dlm_clean_master_list(struct dlm_ctxt *dlm, u8 dead_node)
        struct dlm_master_list_entry *mle;
        struct dlm_lock_resource *res;
        struct hlist_head *bucket;
-       struct hlist_node *list;
+       struct hlist_node *tmp;
        unsigned int i;
 
        mlog(0, "dlm=%s, dead node=%u\n", dlm->name, dead_node);
@@ -3194,10 +3191,7 @@ top:
        spin_lock(&dlm->master_lock);
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
                bucket = dlm_master_hash(dlm, i);
-               hlist_for_each(list, bucket) {
-                       mle = hlist_entry(list, struct dlm_master_list_entry,
-                                         master_hash_node);
-
+               hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
                        BUG_ON(mle->type != DLM_MLE_BLOCK &&
                               mle->type != DLM_MLE_MASTER &&
                               mle->type != DLM_MLE_MIGRATION);
@@ -3378,7 +3372,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
        int i;
        struct hlist_head *bucket;
        struct dlm_master_list_entry *mle;
-       struct hlist_node *tmp, *list;
+       struct hlist_node *tmp;
 
        /*
         * We notified all other nodes that we are exiting the domain and
@@ -3394,9 +3388,7 @@ void dlm_force_free_mles(struct dlm_ctxt *dlm)
 
        for (i = 0; i < DLM_HASH_BUCKETS; i++) {
                bucket = dlm_master_hash(dlm, i);
-               hlist_for_each_safe(list, tmp, bucket) {
-                       mle = hlist_entry(list, struct dlm_master_list_entry,
-                                         master_hash_node);
+               hlist_for_each_entry_safe(mle, tmp, bucket, master_hash_node) {
                        if (mle->type != DLM_MLE_BLOCK) {
                                mlog(ML_ERROR, "bad mle: %p\n", mle);
                                dlm_print_one_mle(mle);
index 773bd32..0b5adca 100644 (file)
@@ -787,6 +787,7 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
 {
        struct dlm_lock_request lr;
        int ret;
+       int status;
 
        mlog(0, "\n");
 
@@ -800,13 +801,15 @@ static int dlm_request_all_locks(struct dlm_ctxt *dlm, u8 request_from,
 
        // send message
        ret = o2net_send_message(DLM_LOCK_REQUEST_MSG, dlm->key,
-                                &lr, sizeof(lr), request_from, NULL);
+                                &lr, sizeof(lr), request_from, &status);
 
        /* negative status is handled by caller */
        if (ret < 0)
                mlog(ML_ERROR, "%s: Error %d send LOCK_REQUEST to node %u "
                     "to recover dead node %u\n", dlm->name, ret,
                     request_from, dead_node);
+       else
+               ret = status;
        // return from here, then
        // sleep until all received or error
        return ret;
@@ -2328,6 +2331,14 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
                        } else if (res->owner == dlm->node_num) {
                                dlm_free_dead_locks(dlm, res, dead_node);
                                __dlm_lockres_calc_usage(dlm, res);
+                       } else if (res->owner == DLM_LOCK_RES_OWNER_UNKNOWN) {
+                               if (test_bit(dead_node, res->refmap)) {
+                                       mlog(0, "%s:%.*s: dead node %u had a ref, but had "
+                                               "no locks and had not purged before dying\n",
+                                               dlm->name, res->lockname.len,
+                                               res->lockname.name, dead_node);
+                                       dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
+                               }
                        }
                        spin_unlock(&res->spinlock);
                }
index e73c833..9db869d 100644 (file)
@@ -286,8 +286,6 @@ static void dlm_shuffle_lists(struct dlm_ctxt *dlm,
                              struct dlm_lock_resource *res)
 {
        struct dlm_lock *lock, *target;
-       struct list_head *iter;
-       struct list_head *head;
        int can_grant = 1;
 
        /*
@@ -314,9 +312,7 @@ converting:
                     dlm->name, res->lockname.len, res->lockname.name);
                BUG();
        }
-       head = &res->granted;
-       list_for_each(iter, head) {
-               lock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->granted, list) {
                if (lock==target)
                        continue;
                if (!dlm_lock_compatible(lock->ml.type,
@@ -333,9 +329,8 @@ converting:
                                        target->ml.convert_type;
                }
        }
-       head = &res->converting;
-       list_for_each(iter, head) {
-               lock = list_entry(iter, struct dlm_lock, list);
+
+       list_for_each_entry(lock, &res->converting, list) {
                if (lock==target)
                        continue;
                if (!dlm_lock_compatible(lock->ml.type,
@@ -384,9 +379,7 @@ blocked:
                goto leave;
        target = list_entry(res->blocked.next, struct dlm_lock, list);
 
-       head = &res->granted;
-       list_for_each(iter, head) {
-               lock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->granted, list) {
                if (lock==target)
                        continue;
                if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
@@ -400,9 +393,7 @@ blocked:
                }
        }
 
-       head = &res->converting;
-       list_for_each(iter, head) {
-               lock = list_entry(iter, struct dlm_lock, list);
+       list_for_each_entry(lock, &res->converting, list) {
                if (lock==target)
                        continue;
                if (!dlm_lock_compatible(lock->ml.type, target->ml.type)) {
index 850aa7e..5698b52 100644 (file)
@@ -388,7 +388,6 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
        struct dlm_ctxt *dlm = data;
        struct dlm_unlock_lock *unlock = (struct dlm_unlock_lock *)msg->buf;
        struct dlm_lock_resource *res = NULL;
-       struct list_head *iter;
        struct dlm_lock *lock = NULL;
        enum dlm_status status = DLM_NORMAL;
        int found = 0, i;
@@ -458,8 +457,7 @@ int dlm_unlock_lock_handler(struct o2net_msg *msg, u32 len, void *data,
        }
 
        for (i=0; i<3; i++) {
-               list_for_each(iter, queue) {
-                       lock = list_entry(iter, struct dlm_lock, list);
+               list_for_each_entry(lock, queue, list) {
                        if (lock->ml.cookie == unlock->cookie &&
                            lock->ml.node == unlock->node_idx) {
                                dlm_lock_get(lock);
index 12bafb7..efa2b3d 100644 (file)
@@ -401,11 +401,8 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 {
        struct inode *inode = new_inode(sb);
        umode_t mode = S_IFDIR | 0755;
-       struct dlmfs_inode_private *ip;
 
        if (inode) {
-               ip = DLMFS_I(inode);
-
                inode->i_ino = get_next_ino();
                inode_init_owner(inode, NULL, mode);
                inode->i_mapping->backing_dev_info = &dlmfs_backing_dev_info;
index 2487116..767370b 100644 (file)
@@ -781,7 +781,6 @@ int ocfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
        cpos = map_start >> osb->s_clustersize_bits;
        mapping_end = ocfs2_clusters_for_bytes(inode->i_sb,
                                               map_start + map_len);
-       mapping_end -= cpos;
        is_last = 0;
        while (cpos < mapping_end && !is_last) {
                u32 fe_flags;
@@ -852,20 +851,20 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
 
        down_read(&OCFS2_I(inode)->ip_alloc_sem);
 
-       if (*offset >= inode->i_size) {
+       if (*offset >= i_size_read(inode)) {
                ret = -ENXIO;
                goto out_unlock;
        }
 
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
                if (whence == SEEK_HOLE)
-                       *offset = inode->i_size;
+                       *offset = i_size_read(inode);
                goto out_unlock;
        }
 
        clen = 0;
        cpos = *offset >> cs_bits;
-       cend = ocfs2_clusters_for_bytes(inode->i_sb, inode->i_size);
+       cend = ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
 
        while (cpos < cend && !is_last) {
                ret = ocfs2_get_clusters_nocache(inode, di_bh, cpos, &hole_size,
@@ -904,8 +903,8 @@ int ocfs2_seek_data_hole_offset(struct file *file, loff_t *offset, int whence)
                extlen = clen;
                extlen <<=  cs_bits;
 
-               if ((extoff + extlen) > inode->i_size)
-                       extlen = inode->i_size - extoff;
+               if ((extoff + extlen) > i_size_read(inode))
+                       extlen = i_size_read(inode) - extoff;
                extoff += extlen;
                if (extoff > *offset)
                        *offset = extoff;
index 3261d71..d71903c 100644 (file)
@@ -671,11 +671,7 @@ restarted_transaction:
                } else {
                        BUG_ON(why != RESTART_TRANS);
 
-                       /* TODO: This can be more intelligent. */
-                       credits = ocfs2_calc_extend_credits(osb->sb,
-                                                           &fe->id2.i_list,
-                                                           clusters_to_add);
-                       status = ocfs2_extend_trans(handle, credits);
+                       status = ocfs2_allocate_extend_trans(handle, 1);
                        if (status < 0) {
                                /* handle still has to be committed at
                                 * this point. */
@@ -1800,6 +1796,7 @@ static int ocfs2_remove_inode_range(struct inode *inode,
        ocfs2_truncate_cluster_pages(inode, byte_start, byte_len);
 
 out:
+       ocfs2_free_path(path);
        ocfs2_schedule_truncate_log_flush(osb, 1);
        ocfs2_run_deallocs(osb, &dealloc);
 
@@ -2245,7 +2242,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
                file->f_path.dentry->d_name.name,
                (unsigned int)nr_segs);
 
-       if (iocb->ki_left == 0)
+       if (iocb->ki_nbytes == 0)
                return 0;
 
        appending = file->f_flags & O_APPEND ? 1 : 0;
@@ -2296,7 +2293,7 @@ relock:
 
        can_do_direct = direct_io;
        ret = ocfs2_prepare_inode_for_write(file, ppos,
-                                           iocb->ki_left, appending,
+                                           iocb->ki_nbytes, appending,
                                            &can_do_direct, &has_refcount);
        if (ret < 0) {
                mlog_errno(ret);
@@ -2304,7 +2301,7 @@ relock:
        }
 
        if (direct_io && !is_sync_kiocb(iocb))
-               unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_left,
+               unaligned_dio = ocfs2_is_io_unaligned(inode, iocb->ki_nbytes,
                                                      *ppos);
 
        /*
index 0c60ef2..fa32ce9 100644 (file)
@@ -303,7 +303,7 @@ int ocfs2_info_handle_journal_size(struct inode *inode,
        if (o2info_from_user(oij, req))
                goto bail;
 
-       oij.ij_journal_size = osb->journal->j_inode->i_size;
+       oij.ij_journal_size = i_size_read(osb->journal->j_inode);
 
        o2info_set_request_filled(&oij.ij_req);
 
index 242170d..44fc3e5 100644 (file)
@@ -455,6 +455,41 @@ bail:
        return status;
 }
 
+/*
+ * If we have fewer than thresh credits, extend by OCFS2_MAX_TRANS_DATA.
+ * If that fails, restart the transaction & regain write access for the
+ * buffer head which is used for metadata modifications.
+ * Taken from Ext4: extend_or_restart_transaction()
+ */
+int ocfs2_allocate_extend_trans(handle_t *handle, int thresh)
+{
+       int status, old_nblks;
+
+       BUG_ON(!handle);
+
+       old_nblks = handle->h_buffer_credits;
+       trace_ocfs2_allocate_extend_trans(old_nblks, thresh);
+
+       if (old_nblks < thresh)
+               return 0;
+
+       status = jbd2_journal_extend(handle, OCFS2_MAX_TRANS_DATA);
+       if (status < 0) {
+               mlog_errno(status);
+               goto bail;
+       }
+
+       if (status > 0) {
+               status = jbd2_journal_restart(handle, OCFS2_MAX_TRANS_DATA);
+               if (status < 0)
+                       mlog_errno(status);
+       }
+
+bail:
+       return status;
+}
+
+
 struct ocfs2_triggers {
        struct jbd2_buffer_trigger_type ot_triggers;
        int                             ot_offset;
@@ -801,14 +836,14 @@ int ocfs2_journal_init(struct ocfs2_journal *journal, int *dirty)
        inode_lock = 1;
        di = (struct ocfs2_dinode *)bh->b_data;
 
-       if (inode->i_size <  OCFS2_MIN_JOURNAL_SIZE) {
+       if (i_size_read(inode) <  OCFS2_MIN_JOURNAL_SIZE) {
                mlog(ML_ERROR, "Journal file size (%lld) is too small!\n",
-                    inode->i_size);
+                    i_size_read(inode));
                status = -EINVAL;
                goto done;
        }
 
-       trace_ocfs2_journal_init(inode->i_size,
+       trace_ocfs2_journal_init(i_size_read(inode),
                                 (unsigned long long)inode->i_blocks,
                                 OCFS2_I(inode)->ip_clusters);
 
@@ -1096,7 +1131,7 @@ static int ocfs2_force_read_journal(struct inode *inode)
 
        memset(bhs, 0, sizeof(struct buffer_head *) * CONCURRENT_JOURNAL_FILL);
 
-       num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, inode->i_size);
+       num_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
        v_blkno = 0;
        while (v_blkno < num_blocks) {
                status = ocfs2_extent_map_get_blocks(inode, v_blkno,
index 0a99273..0b479ba 100644 (file)
@@ -258,6 +258,17 @@ handle_t               *ocfs2_start_trans(struct ocfs2_super *osb,
 int                         ocfs2_commit_trans(struct ocfs2_super *osb,
                                                handle_t *handle);
 int                         ocfs2_extend_trans(handle_t *handle, int nblocks);
+int                         ocfs2_allocate_extend_trans(handle_t *handle,
+                                               int thresh);
+
+/*
+ * Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction.  For unbounded transactions such as
+ * fallocate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go.
+ */
+#define OCFS2_MAX_TRANS_DATA   64U
 
 /*
  * Create access is for when we get a newly created buffer and we're
index aebeacd..cd5496b 100644 (file)
@@ -1082,7 +1082,7 @@ static int ocfs2_local_alloc_reserve_for_window(struct ocfs2_super *osb,
        }
 
 retry_enospc:
-       (*ac)->ac_bits_wanted = osb->local_alloc_default_bits;
+       (*ac)->ac_bits_wanted = osb->local_alloc_bits;
        status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
        if (status == -ENOSPC) {
                if (ocfs2_recalc_la_window(osb, OCFS2_LA_EVENT_ENOSPC) ==
@@ -1154,7 +1154,7 @@ retry_enospc:
                    OCFS2_LA_DISABLED)
                        goto bail;
 
-               ac->ac_bits_wanted = osb->local_alloc_default_bits;
+               ac->ac_bits_wanted = osb->local_alloc_bits;
                status = ocfs2_claim_clusters(handle, ac,
                                              osb->local_alloc_bits,
                                              &cluster_off,
index 452068b..3d3f3c8 100644 (file)
@@ -152,6 +152,7 @@ static int __ocfs2_move_extent(handle_t *handle,
        }
 
 out:
+       ocfs2_free_path(path);
        return ret;
 }
 
@@ -845,7 +846,7 @@ static int __ocfs2_move_extents_range(struct buffer_head *di_bh,
        struct ocfs2_move_extents *range = context->range;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
 
-       if ((inode->i_size == 0) || (range->me_len == 0))
+       if ((i_size_read(inode) == 0) || (range->me_len == 0))
                return 0;
 
        if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
index 3b481f4..1b60c62 100644 (file)
@@ -2579,6 +2579,8 @@ DEFINE_OCFS2_INT_INT_EVENT(ocfs2_extend_trans);
 
 DEFINE_OCFS2_INT_EVENT(ocfs2_extend_trans_restart);
 
+DEFINE_OCFS2_INT_INT_EVENT(ocfs2_allocate_extend_trans);
+
 DEFINE_OCFS2_ULL_ULL_UINT_UINT_EVENT(ocfs2_journal_access);
 
 DEFINE_OCFS2_ULL_EVENT(ocfs2_journal_dirty);
index 332a281..aaa5061 100644 (file)
@@ -234,7 +234,7 @@ ssize_t ocfs2_quota_write(struct super_block *sb, int type,
                len = sb->s_blocksize - OCFS2_QBLK_RESERVED_SPACE - offset;
        }
 
-       if (gqinode->i_size < off + len) {
+       if (i_size_read(gqinode) < off + len) {
                loff_t rounded_end =
                                ocfs2_align_bytes_to_blocks(sb, off + len);
 
@@ -778,8 +778,8 @@ static int ocfs2_acquire_dquot(struct dquot *dquot)
                 */
                WARN_ON(journal_current_handle());
                status = ocfs2_extend_no_holes(gqinode, NULL,
-                       gqinode->i_size + (need_alloc << sb->s_blocksize_bits),
-                       gqinode->i_size);
+                       i_size_read(gqinode) + (need_alloc << sb->s_blocksize_bits),
+                       i_size_read(gqinode));
                if (status < 0)
                        goto out_dq;
        }
index 27fe7ee..2e4344b 100644 (file)
@@ -982,14 +982,14 @@ static struct ocfs2_quota_chunk *ocfs2_local_quota_add_chunk(
 
        /* We are protected by dqio_sem so no locking needed */
        status = ocfs2_extend_no_holes(lqinode, NULL,
-                                      lqinode->i_size + 2 * sb->s_blocksize,
-                                      lqinode->i_size);
+                                      i_size_read(lqinode) + 2 * sb->s_blocksize,
+                                      i_size_read(lqinode));
        if (status < 0) {
                mlog_errno(status);
                goto out;
        }
        status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
-                                         lqinode->i_size + 2 * sb->s_blocksize);
+                                         i_size_read(lqinode) + 2 * sb->s_blocksize);
        if (status < 0) {
                mlog_errno(status);
                goto out;
@@ -1125,14 +1125,14 @@ static struct ocfs2_quota_chunk *ocfs2_extend_local_quota_file(
 
        /* We are protected by dqio_sem so no locking needed */
        status = ocfs2_extend_no_holes(lqinode, NULL,
-                                      lqinode->i_size + sb->s_blocksize,
-                                      lqinode->i_size);
+                                      i_size_read(lqinode) + sb->s_blocksize,
+                                      i_size_read(lqinode));
        if (status < 0) {
                mlog_errno(status);
                goto out;
        }
        status = ocfs2_simple_size_update(lqinode, oinfo->dqi_lqi_bh,
-                                         lqinode->i_size + sb->s_blocksize);
+                                         i_size_read(lqinode) + sb->s_blocksize);
        if (status < 0) {
                mlog_errno(status);
                goto out;
index a70d604..bf4dfc1 100644 (file)
@@ -3854,7 +3854,10 @@ static int ocfs2_attach_refcount_tree(struct inode *inode,
        while (cpos < clusters) {
                ret = ocfs2_get_clusters(inode, cpos, &p_cluster,
                                         &num_clusters, &ext_flags);
-
+               if (ret) {
+                       mlog_errno(ret);
+                       goto unlock;
+               }
                if (p_cluster && !(ext_flags & OCFS2_EXT_REFCOUNTED)) {
                        ret = ocfs2_add_refcount_flag(inode, &di_et,
                                                      &ref_tree->rf_ci,
@@ -4025,7 +4028,10 @@ static int ocfs2_duplicate_extent_list(struct inode *s_inode,
        while (cpos < clusters) {
                ret = ocfs2_get_clusters(s_inode, cpos, &p_cluster,
                                         &num_clusters, &ext_flags);
-
+               if (ret) {
+                       mlog_errno(ret);
+                       goto out;
+               }
                if (p_cluster) {
                        ret = ocfs2_add_refcounted_extent(t_inode, &et,
                                                          ref_ci, ref_root_bh,
index 317ef0a..6ce0686 100644 (file)
@@ -3505,7 +3505,7 @@ int ocfs2_xattr_set(struct inode *inode,
        int ret, credits, ref_meta = 0, ref_credits = 0;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct inode *tl_inode = osb->osb_tl_inode;
-       struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
+       struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, NULL, };
        struct ocfs2_refcount_tree *ref_tree = NULL;
 
        struct ocfs2_xattr_info xi = {
@@ -3609,13 +3609,14 @@ int ocfs2_xattr_set(struct inode *inode,
        if (IS_ERR(ctxt.handle)) {
                ret = PTR_ERR(ctxt.handle);
                mlog_errno(ret);
-               goto cleanup;
+               goto out_free_ac;
        }
 
        ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
 
        ocfs2_commit_trans(osb, ctxt.handle);
 
+out_free_ac:
        if (ctxt.data_ac)
                ocfs2_free_alloc_context(ctxt.data_ac);
        if (ctxt.meta_ac)
@@ -5881,6 +5882,10 @@ static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
        while (cpos < clusters) {
                ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
                                               &num_clusters, el, &ext_flags);
+               if (ret) {
+                       mlog_errno(ret);
+                       break;
+               }
 
                cpos += num_clusters;
                if ((ext_flags & OCFS2_EXT_REFCOUNTED))
@@ -6797,7 +6802,7 @@ out:
        if (ret) {
                if (*meta_ac) {
                        ocfs2_free_alloc_context(*meta_ac);
-                       meta_ac = NULL;
+                       *meta_ac = NULL;
                }
        }
 
index e0d9b3e..54d57d6 100644 (file)
@@ -311,7 +311,7 @@ static void omfs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                omfs_truncate(inode);
        }
 }
index 0ff80f9..985ea88 100644 (file)
@@ -286,7 +286,7 @@ int proc_fd_permission(struct inode *inode, int mask)
        int rv = generic_permission(inode, mask);
        if (rv == 0)
                return 0;
-       if (task_pid(current) == proc_pid(inode))
+       if (task_tgid(current) == proc_pid(inode))
                rv = 0;
        return rv;
 }
index 5aa847a..59d85d6 100644 (file)
@@ -132,13 +132,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
                K(i.freeswap),
                K(global_page_state(NR_FILE_DIRTY)),
                K(global_page_state(NR_WRITEBACK)),
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               K(global_page_state(NR_ANON_PAGES)
-                 + global_page_state(NR_ANON_TRANSPARENT_HUGEPAGES) *
-                 HPAGE_PMD_NR),
-#else
                K(global_page_state(NR_ANON_PAGES)),
-#endif
                K(global_page_state(NR_FILE_MAPPED)),
                K(global_page_state(NR_SHMEM)),
                K(global_page_state(NR_SLAB_RECLAIMABLE) +
index 107d026..7366e9d 100644 (file)
@@ -740,6 +740,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma,
                ptent = pte_file_clear_soft_dirty(ptent);
        }
 
+       if (vma->vm_flags & VM_SOFTDIRTY)
+               vma->vm_flags &= ~VM_SOFTDIRTY;
+
        set_pte_at(vma->vm_mm, addr, pte, ptent);
 #endif
 }
@@ -949,13 +952,15 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
                if (is_migration_entry(entry))
                        page = migration_entry_to_page(entry);
        } else {
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               if (vma->vm_flags & VM_SOFTDIRTY)
+                       flags2 |= __PM_SOFT_DIRTY;
+               *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
                return;
        }
 
        if (page && !PageAnon(page))
                flags |= PM_FILE;
-       if (pte_soft_dirty(pte))
+       if ((vma->vm_flags & VM_SOFTDIRTY) || pte_soft_dirty(pte))
                flags2 |= __PM_SOFT_DIRTY;
 
        *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
@@ -974,7 +979,7 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *p
                *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
                                | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
        else
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               *pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, pmd_flags2));
 }
 #else
 static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
@@ -997,7 +1002,11 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
                int pmd_flags2;
 
-               pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
+               if ((vma->vm_flags & VM_SOFTDIRTY) || pmd_soft_dirty(*pmd))
+                       pmd_flags2 = __PM_SOFT_DIRTY;
+               else
+                       pmd_flags2 = 0;
+
                for (; addr != end; addr += PAGE_SIZE) {
                        unsigned long offset;
 
@@ -1015,12 +1024,17 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
        if (pmd_trans_unstable(pmd))
                return 0;
        for (; addr != end; addr += PAGE_SIZE) {
+               int flags2;
 
                /* check to see if we've left 'vma' behind
                 * and need a new, higher one */
                if (vma && (addr >= vma->vm_end)) {
                        vma = find_vma(walk->mm, addr);
-                       pme = make_pme(PM_NOT_PRESENT(pm->v2));
+                       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+                               flags2 = __PM_SOFT_DIRTY;
+                       else
+                               flags2 = 0;
+                       pme = make_pme(PM_NOT_PRESENT(pm->v2) | PM_STATUS2(pm->v2, flags2));
                }
 
                /* check that 'vma' actually covers this address,
@@ -1044,13 +1058,15 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
 
 #ifdef CONFIG_HUGETLB_PAGE
 static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
-                                       pte_t pte, int offset)
+                                       pte_t pte, int offset, int flags2)
 {
        if (pte_present(pte))
-               *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
-                               | PM_STATUS2(pm->v2, 0) | PM_PRESENT);
+               *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)        |
+                               PM_STATUS2(pm->v2, flags2)              |
+                               PM_PRESENT);
        else
-               *pme = make_pme(PM_NOT_PRESENT(pm->v2));
+               *pme = make_pme(PM_NOT_PRESENT(pm->v2)                  |
+                               PM_STATUS2(pm->v2, flags2));
 }
 
 /* This function walks within one hugetlb entry in the single call */
@@ -1059,12 +1075,22 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
                                 struct mm_walk *walk)
 {
        struct pagemapread *pm = walk->private;
+       struct vm_area_struct *vma;
        int err = 0;
+       int flags2;
        pagemap_entry_t pme;
 
+       vma = find_vma(walk->mm, addr);
+       WARN_ON_ONCE(!vma);
+
+       if (vma && (vma->vm_flags & VM_SOFTDIRTY))
+               flags2 = __PM_SOFT_DIRTY;
+       else
+               flags2 = 0;
+
        for (; addr != end; addr += PAGE_SIZE) {
                int offset = (addr & ~hmask) >> PAGE_SHIFT;
-               huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
+               huge_pte_to_pagemap_entry(&pme, pm, *pte, offset, flags2);
                err = add_to_pagemap(addr, &pme, pm);
                if (err)
                        return err;
@@ -1376,8 +1402,10 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
        walk.mm = mm;
 
        pol = get_vma_policy(task, vma, vma->vm_start);
-       mpol_to_str(buffer, sizeof(buffer), pol);
+       n = mpol_to_str(buffer, sizeof(buffer), pol);
        mpol_cond_put(pol);
+       if (n < 0)
+               return n;
 
        seq_printf(m, "%08lx %s", vma->vm_start, buffer);
 
index a1a16eb..9100d69 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/crash_dump.h>
 #include <linux/list.h>
 #include <linux/vmalloc.h>
+#include <linux/pagemap.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
 #include "internal.h"
@@ -123,11 +124,65 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
        return read;
 }
 
+/*
+ * Architectures may override this function to allocate ELF header in 2nd kernel
+ */
+int __weak elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size)
+{
+       return 0;
+}
+
+/*
+ * Architectures may override this function to free header
+ */
+void __weak elfcorehdr_free(unsigned long long addr)
+{}
+
+/*
+ * Architectures may override this function to read from ELF header
+ */
+ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos)
+{
+       return read_from_oldmem(buf, count, ppos, 0);
+}
+
+/*
+ * Architectures may override this function to read from notes sections
+ */
+ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos)
+{
+       return read_from_oldmem(buf, count, ppos, 0);
+}
+
+/*
+ * Architectures may override this function to map oldmem
+ */
+int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
+                                 unsigned long from, unsigned long pfn,
+                                 unsigned long size, pgprot_t prot)
+{
+       return remap_pfn_range(vma, from, pfn, size, prot);
+}
+
+/*
+ * Copy to either kernel or user space
+ */
+static int copy_to(void *target, void *src, size_t size, int userbuf)
+{
+       if (userbuf) {
+               if (copy_to_user((char __user *) target, src, size))
+                       return -EFAULT;
+       } else {
+               memcpy(target, src, size);
+       }
+       return 0;
+}
+
 /* Read from the ELF header and then the crash dump. On error, negative value is
  * returned otherwise number of bytes read are returned.
  */
-static ssize_t read_vmcore(struct file *file, char __user *buffer,
-                               size_t buflen, loff_t *fpos)
+static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
+                            int userbuf)
 {
        ssize_t acc = 0, tmp;
        size_t tsz;
@@ -144,7 +199,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
        /* Read ELF core header */
        if (*fpos < elfcorebuf_sz) {
                tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
-               if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
+               if (copy_to(buffer, elfcorebuf + *fpos, tsz, userbuf))
                        return -EFAULT;
                buflen -= tsz;
                *fpos += tsz;
@@ -162,7 +217,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
 
                tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
                kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
-               if (copy_to_user(buffer, kaddr, tsz))
+               if (copy_to(buffer, kaddr, tsz, userbuf))
                        return -EFAULT;
                buflen -= tsz;
                *fpos += tsz;
@@ -178,7 +233,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
                if (*fpos < m->offset + m->size) {
                        tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
                        start = m->paddr + *fpos - m->offset;
-                       tmp = read_from_oldmem(buffer, tsz, &start, 1);
+                       tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
                        if (tmp < 0)
                                return tmp;
                        buflen -= tsz;
@@ -195,6 +250,55 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
        return acc;
 }
 
+static ssize_t read_vmcore(struct file *file, char __user *buffer,
+                          size_t buflen, loff_t *fpos)
+{
+       return __read_vmcore((__force char *) buffer, buflen, fpos, 1);
+}
+
+/*
+ * The vmcore fault handler uses the page cache and fills data using the
+ * standard __vmcore_read() function.
+ *
+ * On s390 the fault handler is used for memory regions that can't be mapped
+ * directly with remap_pfn_range().
+ */
+static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+#ifdef CONFIG_S390
+       struct address_space *mapping = vma->vm_file->f_mapping;
+       pgoff_t index = vmf->pgoff;
+       struct page *page;
+       loff_t offset;
+       char *buf;
+       int rc;
+
+       page = find_or_create_page(mapping, index, GFP_KERNEL);
+       if (!page)
+               return VM_FAULT_OOM;
+       if (!PageUptodate(page)) {
+               offset = (loff_t) index << PAGE_CACHE_SHIFT;
+               buf = __va((page_to_pfn(page) << PAGE_SHIFT));
+               rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
+               if (rc < 0) {
+                       unlock_page(page);
+                       page_cache_release(page);
+                       return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
+               }
+               SetPageUptodate(page);
+       }
+       unlock_page(page);
+       vmf->page = page;
+       return 0;
+#else
+       return VM_FAULT_SIGBUS;
+#endif
+}
+
+static const struct vm_operations_struct vmcore_mmap_ops = {
+       .fault = mmap_vmcore_fault,
+};
+
 /**
  * alloc_elfnotes_buf - allocate buffer for ELF note segment in
  *                      vmalloc memory
@@ -223,7 +327,7 @@ static inline char *alloc_elfnotes_buf(size_t notes_sz)
  * regions in the 1st kernel pointed to by PT_LOAD entries) into
  * virtually contiguous user-space in ELF layout.
  */
-#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
+#ifdef CONFIG_MMU
 static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 {
        size_t size = vma->vm_end - vma->vm_start;
@@ -241,6 +345,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 
        vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
        vma->vm_flags |= VM_MIXEDMAP;
+       vma->vm_ops = &vmcore_mmap_ops;
 
        len = 0;
 
@@ -282,9 +387,9 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
 
                        tsz = min_t(size_t, m->offset + m->size - start, size);
                        paddr = m->paddr + start - m->offset;
-                       if (remap_pfn_range(vma, vma->vm_start + len,
-                                           paddr >> PAGE_SHIFT, tsz,
-                                           vma->vm_page_prot))
+                       if (remap_oldmem_pfn_range(vma, vma->vm_start + len,
+                                                  paddr >> PAGE_SHIFT, tsz,
+                                                  vma->vm_page_prot))
                                goto fail;
                        size -= tsz;
                        start += tsz;
@@ -357,7 +462,7 @@ static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
                notes_section = kmalloc(max_sz, GFP_KERNEL);
                if (!notes_section)
                        return -ENOMEM;
-               rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+               rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
                if (rc < 0) {
                        kfree(notes_section);
                        return rc;
@@ -444,7 +549,8 @@ static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
                if (phdr_ptr->p_type != PT_NOTE)
                        continue;
                offset = phdr_ptr->p_offset;
-               rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+               rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
+                                          &offset);
                if (rc < 0)
                        return rc;
                notes_buf += phdr_ptr->p_memsz;
@@ -536,7 +642,7 @@ static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
                notes_section = kmalloc(max_sz, GFP_KERNEL);
                if (!notes_section)
                        return -ENOMEM;
-               rc = read_from_oldmem(notes_section, max_sz, &offset, 0);
+               rc = elfcorehdr_read_notes(notes_section, max_sz, &offset);
                if (rc < 0) {
                        kfree(notes_section);
                        return rc;
@@ -623,7 +729,8 @@ static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
                if (phdr_ptr->p_type != PT_NOTE)
                        continue;
                offset = phdr_ptr->p_offset;
-               rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+               rc = elfcorehdr_read_notes(notes_buf, phdr_ptr->p_memsz,
+                                          &offset);
                if (rc < 0)
                        return rc;
                notes_buf += phdr_ptr->p_memsz;
@@ -810,7 +917,7 @@ static int __init parse_crash_elf64_headers(void)
        addr = elfcorehdr_addr;
 
        /* Read Elf header */
-       rc = read_from_oldmem((char*)&ehdr, sizeof(Elf64_Ehdr), &addr, 0);
+       rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf64_Ehdr), &addr);
        if (rc < 0)
                return rc;
 
@@ -837,7 +944,7 @@ static int __init parse_crash_elf64_headers(void)
        if (!elfcorebuf)
                return -ENOMEM;
        addr = elfcorehdr_addr;
-       rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+       rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
        if (rc < 0)
                goto fail;
 
@@ -866,7 +973,7 @@ static int __init parse_crash_elf32_headers(void)
        addr = elfcorehdr_addr;
 
        /* Read Elf header */
-       rc = read_from_oldmem((char*)&ehdr, sizeof(Elf32_Ehdr), &addr, 0);
+       rc = elfcorehdr_read((char *)&ehdr, sizeof(Elf32_Ehdr), &addr);
        if (rc < 0)
                return rc;
 
@@ -892,7 +999,7 @@ static int __init parse_crash_elf32_headers(void)
        if (!elfcorebuf)
                return -ENOMEM;
        addr = elfcorehdr_addr;
-       rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+       rc = elfcorehdr_read(elfcorebuf, elfcorebuf_sz_orig, &addr);
        if (rc < 0)
                goto fail;
 
@@ -919,7 +1026,7 @@ static int __init parse_crash_elf_headers(void)
        int rc=0;
 
        addr = elfcorehdr_addr;
-       rc = read_from_oldmem(e_ident, EI_NIDENT, &addr, 0);
+       rc = elfcorehdr_read(e_ident, EI_NIDENT, &addr);
        if (rc < 0)
                return rc;
        if (memcmp(e_ident, ELFMAG, SELFMAG) != 0) {
@@ -952,7 +1059,14 @@ static int __init vmcore_init(void)
 {
        int rc = 0;
 
-       /* If elfcorehdr= has been passed in cmdline, then capture the dump.*/
+       /* Allow architectures to allocate ELF header in 2nd kernel */
+       rc = elfcorehdr_alloc(&elfcorehdr_addr, &elfcorehdr_size);
+       if (rc)
+               return rc;
+       /*
+        * If elfcorehdr= has been passed in cmdline or created in 2nd kernel,
+        * then capture the dump.
+        */
        if (!(is_vmcore_usable()))
                return rc;
        rc = parse_crash_elf_headers();
@@ -960,6 +1074,8 @@ static int __init vmcore_init(void)
                pr_warn("Kdump: vmcore not initialized\n");
                return rc;
        }
+       elfcorehdr_free(elfcorehdr_addr);
+       elfcorehdr_addr = ELFCORE_ADDR_ERR;
 
        proc_vmcore = proc_create("vmcore", S_IRUSR, NULL, &proc_vmcore_operations);
        if (proc_vmcore)
index 9a702e1..831d49a 100644 (file)
@@ -687,45 +687,37 @@ int dquot_quota_sync(struct super_block *sb, int type)
 }
 EXPORT_SYMBOL(dquot_quota_sync);
 
-/* Free unused dquots from cache */
-static void prune_dqcache(int count)
+static unsigned long
+dqcache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 {
        struct list_head *head;
        struct dquot *dquot;
+       unsigned long freed = 0;
 
        head = free_dquots.prev;
-       while (head != &free_dquots && count) {
+       while (head != &free_dquots && sc->nr_to_scan) {
                dquot = list_entry(head, struct dquot, dq_free);
                remove_dquot_hash(dquot);
                remove_free_dquot(dquot);
                remove_inuse(dquot);
                do_destroy_dquot(dquot);
-               count--;
+               sc->nr_to_scan--;
+               freed++;
                head = free_dquots.prev;
        }
+       return freed;
 }
 
-/*
- * This is called from kswapd when we think we need some
- * more memory
- */
-static int shrink_dqcache_memory(struct shrinker *shrink,
-                                struct shrink_control *sc)
+static unsigned long
+dqcache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
 {
-       int nr = sc->nr_to_scan;
-
-       if (nr) {
-               spin_lock(&dq_list_lock);
-               prune_dqcache(nr);
-               spin_unlock(&dq_list_lock);
-       }
-       return ((unsigned)
-               percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS])
-               /100) * sysctl_vfs_cache_pressure;
+       return vfs_pressure_ratio(
+       percpu_counter_read_positive(&dqstats.counter[DQST_FREE_DQUOTS]));
 }
 
 static struct shrinker dqcache_shrinker = {
-       .shrink = shrink_dqcache_memory,
+       .count_objects = dqcache_shrink_count,
+       .scan_objects = dqcache_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };
 
index c24f1e1..39d1465 100644 (file)
@@ -244,12 +244,6 @@ struct dentry *ramfs_mount(struct file_system_type *fs_type,
        return mount_nodev(fs_type, flags, data, ramfs_fill_super);
 }
 
-static struct dentry *rootfs_mount(struct file_system_type *fs_type,
-       int flags, const char *dev_name, void *data)
-{
-       return mount_nodev(fs_type, flags|MS_NOUSER, data, ramfs_fill_super);
-}
-
 static void ramfs_kill_sb(struct super_block *sb)
 {
        kfree(sb->s_fs_info);
@@ -262,29 +256,23 @@ static struct file_system_type ramfs_fs_type = {
        .kill_sb        = ramfs_kill_sb,
        .fs_flags       = FS_USERNS_MOUNT,
 };
-static struct file_system_type rootfs_fs_type = {
-       .name           = "rootfs",
-       .mount          = rootfs_mount,
-       .kill_sb        = kill_litter_super,
-};
 
-static int __init init_ramfs_fs(void)
-{
-       return register_filesystem(&ramfs_fs_type);
-}
-module_init(init_ramfs_fs)
-
-int __init init_rootfs(void)
+int __init init_ramfs_fs(void)
 {
+       static unsigned long once;
        int err;
 
+       if (test_and_set_bit(0, &once))
+               return 0;
+
        err = bdi_init(&ramfs_backing_dev_info);
        if (err)
                return err;
 
-       err = register_filesystem(&rootfs_fs_type);
+       err = register_filesystem(&ramfs_fs_type);
        if (err)
                bdi_destroy(&ramfs_backing_dev_info);
 
        return err;
 }
+module_init(init_ramfs_fs)
index 122a384..e3cd280 100644 (file)
@@ -367,7 +367,6 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_left = len;
        kiocb.ki_nbytes = len;
 
        ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
@@ -417,7 +416,6 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_left = len;
        kiocb.ki_nbytes = len;
 
        ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
@@ -599,7 +597,6 @@ static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
 
        init_sync_kiocb(&kiocb, filp);
        kiocb.ki_pos = *ppos;
-       kiocb.ki_left = len;
        kiocb.ki_nbytes = len;
 
        ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
index fb50652..41d108e 100644 (file)
@@ -167,17 +167,14 @@ int squashfs_read_data(struct super_block *sb, void **buffer, u64 index,
                /*
                 * Block is uncompressed.
                 */
-               int i, in, pg_offset = 0;
-
-               for (i = 0; i < b; i++) {
-                       wait_on_buffer(bh[i]);
-                       if (!buffer_uptodate(bh[i]))
-                               goto block_release;
-               }
+               int in, pg_offset = 0;
 
                for (bytes = length; k < b; k++) {
                        in = min(bytes, msblk->devblksize - offset);
                        bytes -= in;
+                       wait_on_buffer(bh[k]);
+                       if (!buffer_uptodate(bh[k]))
+                               goto block_release;
                        while (in) {
                                if (pg_offset == PAGE_CACHE_SIZE) {
                                        page++;
index f7f527b..d8c2d74 100644 (file)
@@ -54,6 +54,7 @@ static int get_dir_index_using_offset(struct super_block *sb,
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
        int err, i, index, length = 0;
+       unsigned int size;
        struct squashfs_dir_index dir_index;
 
        TRACE("Entered get_dir_index_using_offset, i_count %d, f_pos %lld\n",
@@ -81,8 +82,14 @@ static int get_dir_index_using_offset(struct super_block *sb,
                         */
                        break;
 
+               size = le32_to_cpu(dir_index.size) + 1;
+
+               /* size should never be larger than SQUASHFS_NAME_LEN */
+               if (size > SQUASHFS_NAME_LEN)
+                       break;
+
                err = squashfs_read_metadata(sb, NULL, &index_start,
-                               &index_offset, le32_to_cpu(dir_index.size) + 1);
+                               &index_offset, size);
                if (err < 0)
                        break;
 
@@ -105,9 +112,8 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx)
        struct inode *inode = file_inode(file);
        struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
        u64 block = squashfs_i(inode)->start + msblk->directory_table;
-       int offset = squashfs_i(inode)->offset, length, dir_count, size,
-                               type, err;
-       unsigned int inode_number;
+       int offset = squashfs_i(inode)->offset, length, err;
+       unsigned int inode_number, dir_count, size, type;
        struct squashfs_dir_header dirh;
        struct squashfs_dir_entry *dire;
 
@@ -200,6 +206,9 @@ static int squashfs_readdir(struct file *file, struct dir_context *ctx)
                                ((short) le16_to_cpu(dire->inode_number));
                        type = le16_to_cpu(dire->type);
 
+                       if (type > SQUASHFS_MAX_DIR_TYPE)
+                               goto failed_read;
+
                        if (!dir_emit(ctx, dire->name, size,
                                        inode_number,
                                        squashfs_filetype_table[type]))
index 7834a51..67cad77 100644 (file)
@@ -79,7 +79,8 @@ static int get_dir_index_using_name(struct super_block *sb,
                        int len)
 {
        struct squashfs_sb_info *msblk = sb->s_fs_info;
-       int i, size, length = 0, err;
+       int i, length = 0, err;
+       unsigned int size;
        struct squashfs_dir_index *index;
        char *str;
 
@@ -103,6 +104,8 @@ static int get_dir_index_using_name(struct super_block *sb,
 
 
                size = le32_to_cpu(index->size) + 1;
+               if (size > SQUASHFS_NAME_LEN)
+                       break;
 
                err = squashfs_read_metadata(sb, index->name, &index_start,
                                        &index_offset, size);
@@ -144,7 +147,8 @@ static struct dentry *squashfs_lookup(struct inode *dir, struct dentry *dentry,
        struct squashfs_dir_entry *dire;
        u64 block = squashfs_i(dir)->start + msblk->directory_table;
        int offset = squashfs_i(dir)->offset;
-       int err, length, dir_count, size;
+       int err, length;
+       unsigned int dir_count, size;
 
        TRACE("Entered squashfs_lookup [%llx:%x]\n", block, offset);
 
index 9e2349d..4b2beda 100644 (file)
@@ -87,7 +87,7 @@
 #define SQUASHFS_COMP_OPTS(flags)              SQUASHFS_BIT(flags, \
                                                SQUASHFS_COMP_OPT)
 
-/* Max number of types and file types */
+/* Inode types including extended types */
 #define SQUASHFS_DIR_TYPE              1
 #define SQUASHFS_REG_TYPE              2
 #define SQUASHFS_SYMLINK_TYPE          3
 #define SQUASHFS_LFIFO_TYPE            13
 #define SQUASHFS_LSOCKET_TYPE          14
 
+/* Max type value stored in directory entry */
+#define SQUASHFS_MAX_DIR_TYPE          7
+
 /* Xattr types */
 #define SQUASHFS_XATTR_USER             0
 #define SQUASHFS_XATTR_TRUSTED          1
index f6961ea..3a96c97 100644 (file)
@@ -53,11 +53,15 @@ static char *sb_writers_name[SB_FREEZE_LEVELS] = {
  * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
  * take a passive reference to the superblock to avoid this from occurring.
  */
-static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long super_cache_scan(struct shrinker *shrink,
+                                     struct shrink_control *sc)
 {
        struct super_block *sb;
-       int     fs_objects = 0;
-       int     total_objects;
+       long    fs_objects = 0;
+       long    total_objects;
+       long    freed = 0;
+       long    dentries;
+       long    inodes;
 
        sb = container_of(shrink, struct super_block, s_shrink);
 
@@ -65,46 +69,62 @@ static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
         * Deadlock avoidance.  We may hold various FS locks, and we don't want
         * to recurse into the FS that called us in clear_inode() and friends..
         */
-       if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
-               return -1;
+       if (!(sc->gfp_mask & __GFP_FS))
+               return SHRINK_STOP;
 
        if (!grab_super_passive(sb))
-               return -1;
+               return SHRINK_STOP;
 
        if (sb->s_op->nr_cached_objects)
-               fs_objects = sb->s_op->nr_cached_objects(sb);
-
-       total_objects = sb->s_nr_dentry_unused +
-                       sb->s_nr_inodes_unused + fs_objects + 1;
-
-       if (sc->nr_to_scan) {
-               int     dentries;
-               int     inodes;
-
-               /* proportion the scan between the caches */
-               dentries = (sc->nr_to_scan * sb->s_nr_dentry_unused) /
-                                                       total_objects;
-               inodes = (sc->nr_to_scan * sb->s_nr_inodes_unused) /
-                                                       total_objects;
-               if (fs_objects)
-                       fs_objects = (sc->nr_to_scan * fs_objects) /
-                                                       total_objects;
-               /*
-                * prune the dcache first as the icache is pinned by it, then
-                * prune the icache, followed by the filesystem specific caches
-                */
-               prune_dcache_sb(sb, dentries);
-               prune_icache_sb(sb, inodes);
+               fs_objects = sb->s_op->nr_cached_objects(sb, sc->nid);
 
-               if (fs_objects && sb->s_op->free_cached_objects) {
-                       sb->s_op->free_cached_objects(sb, fs_objects);
-                       fs_objects = sb->s_op->nr_cached_objects(sb);
-               }
-               total_objects = sb->s_nr_dentry_unused +
-                               sb->s_nr_inodes_unused + fs_objects;
+       inodes = list_lru_count_node(&sb->s_inode_lru, sc->nid);
+       dentries = list_lru_count_node(&sb->s_dentry_lru, sc->nid);
+       total_objects = dentries + inodes + fs_objects + 1;
+
+       /* proportion the scan between the caches */
+       dentries = mult_frac(sc->nr_to_scan, dentries, total_objects);
+       inodes = mult_frac(sc->nr_to_scan, inodes, total_objects);
+
+       /*
+        * prune the dcache first as the icache is pinned by it, then
+        * prune the icache, followed by the filesystem specific caches
+        */
+       freed = prune_dcache_sb(sb, dentries, sc->nid);
+       freed += prune_icache_sb(sb, inodes, sc->nid);
+
+       if (fs_objects) {
+               fs_objects = mult_frac(sc->nr_to_scan, fs_objects,
+                                                               total_objects);
+               freed += sb->s_op->free_cached_objects(sb, fs_objects,
+                                                      sc->nid);
        }
 
-       total_objects = (total_objects / 100) * sysctl_vfs_cache_pressure;
+       drop_super(sb);
+       return freed;
+}
+
+static unsigned long super_cache_count(struct shrinker *shrink,
+                                      struct shrink_control *sc)
+{
+       struct super_block *sb;
+       long    total_objects = 0;
+
+       sb = container_of(shrink, struct super_block, s_shrink);
+
+       if (!grab_super_passive(sb))
+               return 0;
+
+       if (sb->s_op && sb->s_op->nr_cached_objects)
+               total_objects = sb->s_op->nr_cached_objects(sb,
+                                                sc->nid);
+
+       total_objects += list_lru_count_node(&sb->s_dentry_lru,
+                                                sc->nid);
+       total_objects += list_lru_count_node(&sb->s_inode_lru,
+                                                sc->nid);
+
+       total_objects = vfs_pressure_ratio(total_objects);
        drop_super(sb);
        return total_objects;
 }
@@ -175,9 +195,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                INIT_HLIST_NODE(&s->s_instances);
                INIT_HLIST_BL_HEAD(&s->s_anon);
                INIT_LIST_HEAD(&s->s_inodes);
-               INIT_LIST_HEAD(&s->s_dentry_lru);
-               INIT_LIST_HEAD(&s->s_inode_lru);
-               spin_lock_init(&s->s_inode_lru_lock);
+
+               if (list_lru_init(&s->s_dentry_lru))
+                       goto err_out;
+               if (list_lru_init(&s->s_inode_lru))
+                       goto err_out_dentry_lru;
+
                INIT_LIST_HEAD(&s->s_mounts);
                init_rwsem(&s->s_umount);
                lockdep_set_class(&s->s_umount, &type->s_umount_key);
@@ -210,11 +233,16 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
                s->cleancache_poolid = -1;
 
                s->s_shrink.seeks = DEFAULT_SEEKS;
-               s->s_shrink.shrink = prune_super;
+               s->s_shrink.scan_objects = super_cache_scan;
+               s->s_shrink.count_objects = super_cache_count;
                s->s_shrink.batch = 1024;
+               s->s_shrink.flags = SHRINKER_NUMA_AWARE;
        }
 out:
        return s;
+
+err_out_dentry_lru:
+       list_lru_destroy(&s->s_dentry_lru);
 err_out:
        security_sb_free(s);
 #ifdef CONFIG_SMP
@@ -295,6 +323,9 @@ void deactivate_locked_super(struct super_block *s)
 
                /* caches are now gone, we can safely kill the shrinker now */
                unregister_shrinker(&s->s_shrink);
+               list_lru_destroy(&s->s_dentry_lru);
+               list_lru_destroy(&s->s_inode_lru);
+
                put_filesystem(fs);
                put_super(s);
        } else {
index c1a591a..66bc316 100644 (file)
@@ -469,7 +469,7 @@ static void sysv_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size) {
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
                sysv_truncate(inode);
        }
 }
index 7f60e90..6e025e0 100644 (file)
@@ -2587,10 +2587,11 @@ int dbg_leb_write(struct ubifs_info *c, int lnum, const void *buf,
                return -EROFS;
 
        failing = power_cut_emulated(c, lnum, 1);
-       if (failing)
+       if (failing) {
                len = corrupt_data(c, buf, len);
-       ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
-                  len, lnum, offs);
+               ubifs_warn("actually write %d bytes to LEB %d:%d (the buffer was corrupted)",
+                          len, lnum, offs);
+       }
        err = ubi_leb_write(c->ubi, lnum, buf, offs, len);
        if (err)
                return err;
index 9e1d056..f35135e 100644 (file)
@@ -277,18 +277,25 @@ static int kick_a_thread(void)
        return 0;
 }
 
-int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+                                struct shrink_control *sc)
 {
-       int nr = sc->nr_to_scan;
-       int freed, contention = 0;
        long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 
-       if (nr == 0)
-               /*
-                * Due to the way UBIFS updates the clean znode counter it may
-                * temporarily be negative.
-                */
-               return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
+       /*
+        * Due to the way UBIFS updates the clean znode counter it may
+        * temporarily be negative.
+        */
+       return clean_zn_cnt >= 0 ? clean_zn_cnt : 1;
+}
+
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+                               struct shrink_control *sc)
+{
+       unsigned long nr = sc->nr_to_scan;
+       int contention = 0;
+       unsigned long freed;
+       long clean_zn_cnt = atomic_long_read(&ubifs_clean_zn_cnt);
 
        if (!clean_zn_cnt) {
                /*
@@ -316,10 +323,10 @@ int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc)
 
        if (!freed && contention) {
                dbg_tnc("freed nothing, but contention");
-               return -1;
+               return SHRINK_STOP;
        }
 
 out:
-       dbg_tnc("%d znodes were freed, requested %d", freed, nr);
+       dbg_tnc("%lu znodes were freed, requested %lu", freed, nr);
        return freed;
 }
index 879b997..3e4aa72 100644 (file)
@@ -49,7 +49,8 @@ struct kmem_cache *ubifs_inode_slab;
 
 /* UBIFS TNC shrinker description */
 static struct shrinker ubifs_shrinker_info = {
-       .shrink = ubifs_shrinker,
+       .scan_objects = ubifs_shrink_scan,
+       .count_objects = ubifs_shrink_count,
        .seeks = DEFAULT_SEEKS,
 };
 
index b2babce..e8c8cfe 100644 (file)
@@ -1624,7 +1624,10 @@ int ubifs_tnc_start_commit(struct ubifs_info *c, struct ubifs_zbranch *zroot);
 int ubifs_tnc_end_commit(struct ubifs_info *c);
 
 /* shrinker.c */
-int ubifs_shrinker(struct shrinker *shrink, struct shrink_control *sc);
+unsigned long ubifs_shrink_scan(struct shrinker *shrink,
+                               struct shrink_control *sc);
+unsigned long ubifs_shrink_count(struct shrinker *shrink,
+                                struct shrink_control *sc);
 
 /* commit.c */
 int ubifs_bg_thread(void *info);
index 29569dd..c02a27a 100644 (file)
@@ -141,7 +141,7 @@ static ssize_t udf_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
        int err, pos;
-       size_t count = iocb->ki_left;
+       size_t count = iocb->ki_nbytes;
        struct udf_inode_info *iinfo = UDF_I(inode);
 
        down_write(&iinfo->i_data_sem);
index b6d15d3..062b792 100644 (file)
@@ -172,7 +172,7 @@ static void udf_write_failed(struct address_space *mapping, loff_t to)
        loff_t isize = inode->i_size;
 
        if (to > isize) {
-               truncate_pagecache(inode, to, isize);
+               truncate_pagecache(inode, isize);
                if (iinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
                        down_write(&iinfo->i_data_sem);
                        udf_clear_extent_cache(inode);
index ff24e44..c8ca960 100644 (file)
@@ -531,7 +531,7 @@ static void ufs_write_failed(struct address_space *mapping, loff_t to)
        struct inode *inode = mapping->host;
 
        if (to > inode->i_size)
-               truncate_pagecache(inode, to, inode->i_size);
+               truncate_pagecache(inode, inode->i_size);
 }
 
 static int ufs_write_begin(struct file *file, struct address_space *mapping,
index 4a7286c..a02cfb9 100644 (file)
@@ -27,8 +27,6 @@
 
 /*
  * Greedy allocation.  May fail and may return vmalloced memory.
- *
- * Must be freed using kmem_free_large.
  */
 void *
 kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
@@ -36,7 +34,7 @@ kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize)
        void            *ptr;
        size_t          kmsize = maxsize;
 
-       while (!(ptr = kmem_zalloc_large(kmsize))) {
+       while (!(ptr = vzalloc(kmsize))) {
                if ((kmsize >>= 1) <= minsize)
                        kmsize = minsize;
        }
@@ -75,6 +73,17 @@ kmem_zalloc(size_t size, xfs_km_flags_t flags)
        return ptr;
 }
 
+void *
+kmem_zalloc_large(size_t size, xfs_km_flags_t flags)
+{
+       void    *ptr;
+
+       ptr = kmem_zalloc(size, flags | KM_MAYFAIL);
+       if (ptr)
+               return ptr;
+       return vzalloc(size);
+}
+
 void
 kmem_free(const void *ptr)
 {
index b2f2620..3a7371c 100644 (file)
@@ -57,17 +57,10 @@ kmem_flags_convert(xfs_km_flags_t flags)
 
 extern void *kmem_alloc(size_t, xfs_km_flags_t);
 extern void *kmem_zalloc(size_t, xfs_km_flags_t);
+extern void *kmem_zalloc_large(size_t size, xfs_km_flags_t);
 extern void *kmem_realloc(const void *, size_t, size_t, xfs_km_flags_t);
 extern void  kmem_free(const void *);
 
-static inline void *kmem_zalloc_large(size_t size)
-{
-       return vzalloc(size);
-}
-static inline void kmem_free_large(void *ptr)
-{
-       vfree(ptr);
-}
 
 extern void *kmem_zalloc_greedy(size_t *, size_t, size_t);
 
index 6951896..0e2f37e 100644 (file)
@@ -152,7 +152,7 @@ xfs_get_acl(struct inode *inode, int type)
         * go out to the disk.
         */
        len = XFS_ACL_MAX_SIZE(ip->i_mount);
-       xfs_acl = kzalloc(len, GFP_KERNEL);
+       xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
        if (!xfs_acl)
                return ERR_PTR(-ENOMEM);
 
@@ -175,10 +175,10 @@ xfs_get_acl(struct inode *inode, int type)
        if (IS_ERR(acl))
                goto out;
 
- out_update_cache:
+out_update_cache:
        set_cached_acl(inode, type, acl);
- out:
-       kfree(xfs_acl);
+out:
+       kmem_free(xfs_acl);
        return acl;
 }
 
@@ -209,7 +209,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                struct xfs_acl *xfs_acl;
                int len = XFS_ACL_MAX_SIZE(ip->i_mount);
 
-               xfs_acl = kzalloc(len, GFP_KERNEL);
+               xfs_acl = kmem_zalloc_large(len, KM_SLEEP);
                if (!xfs_acl)
                        return -ENOMEM;
 
@@ -222,7 +222,7 @@ xfs_set_acl(struct inode *inode, int type, struct posix_acl *acl)
                error = -xfs_attr_set(ip, ea_name, (unsigned char *)xfs_acl,
                                len, ATTR_ROOT);
 
-               kfree(xfs_acl);
+               kmem_free(xfs_acl);
        } else {
                /*
                 * A NULL ACL argument means we want to remove the ACL.
index 977da0e..e51e581 100644 (file)
@@ -1582,7 +1582,7 @@ xfs_vm_write_begin(
                unlock_page(page);
 
                if (pos + len > i_size_read(inode))
-                       truncate_pagecache(inode, pos + len, i_size_read(inode));
+                       truncate_pagecache(inode, i_size_read(inode));
 
                page_cache_release(page);
                page = NULL;
@@ -1618,7 +1618,7 @@ xfs_vm_write_end(
                loff_t          to = pos + len;
 
                if (to > isize) {
-                       truncate_pagecache(inode, to, isize);
+                       truncate_pagecache(inode, isize);
                        xfs_vm_kill_delalloc_range(inode, isize, to);
                }
        }
index 92b8309..f47e65c 100644 (file)
@@ -4450,7 +4450,7 @@ xfs_bmapi_write(
 {
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_ifork        *ifp;
-       struct xfs_bmalloca     bma = { 0 };    /* args for xfs_bmap_alloc */
+       struct xfs_bmalloca     bma = { NULL }; /* args for xfs_bmap_alloc */
        xfs_fileoff_t           end;            /* end of mapped file region */
        int                     eof;            /* after the end of extents */
        int                     error;          /* error return */
index cf3bc76..bb8de8e 100644 (file)
@@ -925,3 +925,47 @@ xfs_bmdr_maxrecs(
                return blocklen / sizeof(xfs_bmdr_rec_t);
        return blocklen / (sizeof(xfs_bmdr_key_t) + sizeof(xfs_bmdr_ptr_t));
 }
+
+/*
+ * Change the owner of a btree format fork fo the inode passed in. Change it to
+ * the owner of that is passed in so that we can change owners before or after
+ * we switch forks between inodes. The operation that the caller is doing will
+ * determine whether is needs to change owner before or after the switch.
+ *
+ * For demand paged transactional modification, the fork switch should be done
+ * after reading in all the blocks, modifying them and pinning them in the
+ * transaction. For modification when the buffers are already pinned in memory,
+ * the fork switch can be done before changing the owner as we won't need to
+ * validate the owner until the btree buffers are unpinned and writes can occur
+ * again.
+ *
+ * For recovery based ownership change, there is no transactional context and
+ * so a buffer list must be supplied so that we can record the buffers that we
+ * modified for the caller to issue IO on.
+ */
+int
+xfs_bmbt_change_owner(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     whichfork,
+       xfs_ino_t               new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_cur    *cur;
+       int                     error;
+
+       ASSERT(tp || buffer_list);
+       ASSERT(!(tp && buffer_list));
+       if (whichfork == XFS_DATA_FORK)
+               ASSERT(ip->i_d.di_format == XFS_DINODE_FMT_BTREE);
+       else
+               ASSERT(ip->i_d.di_aformat == XFS_DINODE_FMT_BTREE);
+
+       cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
+       if (!cur)
+               return ENOMEM;
+
+       error = xfs_btree_change_owner(cur, new_owner, buffer_list);
+       xfs_btree_del_cursor(cur, error ? XFS_BTREE_ERROR : XFS_BTREE_NOERROR);
+       return error;
+}
index 1b726d6..e367461 100644 (file)
@@ -236,6 +236,10 @@ extern int xfs_bmbt_get_maxrecs(struct xfs_btree_cur *, int level);
 extern int xfs_bmdr_maxrecs(struct xfs_mount *, int blocklen, int leaf);
 extern int xfs_bmbt_maxrecs(struct xfs_mount *, int blocklen, int leaf);
 
+extern int xfs_bmbt_change_owner(struct xfs_trans *tp, struct xfs_inode *ip,
+                                int whichfork, xfs_ino_t new_owner,
+                                struct list_head *buffer_list);
+
 extern struct xfs_btree_cur *xfs_bmbt_init_cursor(struct xfs_mount *,
                struct xfs_trans *, struct xfs_inode *, int);
 
index 541d59f..97f952c 100644 (file)
@@ -612,13 +612,9 @@ xfs_getbmap(
 
        if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
                return XFS_ERROR(ENOMEM);
-       out = kmem_zalloc(bmv->bmv_count * sizeof(struct getbmapx), KM_MAYFAIL);
-       if (!out) {
-               out = kmem_zalloc_large(bmv->bmv_count *
-                                       sizeof(struct getbmapx));
-               if (!out)
-                       return XFS_ERROR(ENOMEM);
-       }
+       out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
+       if (!out)
+               return XFS_ERROR(ENOMEM);
 
        xfs_ilock(ip, XFS_IOLOCK_SHARED);
        if (whichfork == XFS_DATA_FORK && !(iflags & BMV_IF_DELALLOC)) {
@@ -754,10 +750,7 @@ xfs_getbmap(
                        break;
        }
 
-       if (is_vmalloc_addr(out))
-               kmem_free_large(out);
-       else
-               kmem_free(out);
+       kmem_free(out);
        return error;
 }
 
@@ -1789,14 +1782,6 @@ xfs_swap_extents(
        int             taforkblks = 0;
        __uint64_t      tmp;
 
-       /*
-        * We have no way of updating owner information in the BMBT blocks for
-        * each inode on CRC enabled filesystems, so to avoid corrupting the
-        * this metadata we simply don't allow extent swaps to occur.
-        */
-       if (xfs_sb_version_hascrc(&mp->m_sb))
-               return XFS_ERROR(EINVAL);
-
        tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
        if (!tempifp) {
                error = XFS_ERROR(ENOMEM);
@@ -1920,6 +1905,42 @@ xfs_swap_extents(
                        goto out_trans_cancel;
        }
 
+       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
+
+       /*
+        * Before we've swapped the forks, lets set the owners of the forks
+        * appropriately. We have to do this as we are demand paging the btree
+        * buffers, and so the validation done on read will expect the owner
+        * field to be correctly set. Once we change the owners, we can swap the
+        * inode forks.
+        *
+        * Note the trickiness in setting the log flags - we set the owner log
+        * flag on the opposite inode (i.e. the inode we are setting the new
+        * owner to be) because once we swap the forks and log that, log
+        * recovery is going to see the fork as owned by the swapped inode,
+        * not the pre-swapped inodes.
+        */
+       src_log_flags = XFS_ILOG_CORE;
+       target_log_flags = XFS_ILOG_CORE;
+       if (ip->i_d.di_version == 3 &&
+           ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               target_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
+                                             tip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
+       if (tip->i_d.di_version == 3 &&
+           tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+               src_log_flags |= XFS_ILOG_DOWNER;
+               error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
+                                             ip->i_ino, NULL);
+               if (error)
+                       goto out_trans_cancel;
+       }
+
        /*
         * Swap the data forks of the inodes
         */
@@ -1957,7 +1978,6 @@ xfs_swap_extents(
        tip->i_delayed_blks = ip->i_delayed_blks;
        ip->i_delayed_blks = 0;
 
-       src_log_flags = XFS_ILOG_CORE;
        switch (ip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
                /* If the extents fit in the inode, fix the
@@ -1971,11 +1991,12 @@ xfs_swap_extents(
                src_log_flags |= XFS_ILOG_DEXT;
                break;
        case XFS_DINODE_FMT_BTREE:
+               ASSERT(ip->i_d.di_version < 3 ||
+                      (src_log_flags & XFS_ILOG_DOWNER));
                src_log_flags |= XFS_ILOG_DBROOT;
                break;
        }
 
-       target_log_flags = XFS_ILOG_CORE;
        switch (tip->i_d.di_format) {
        case XFS_DINODE_FMT_EXTENTS:
                /* If the extents fit in the inode, fix the
@@ -1990,13 +2011,11 @@ xfs_swap_extents(
                break;
        case XFS_DINODE_FMT_BTREE:
                target_log_flags |= XFS_ILOG_DBROOT;
+               ASSERT(tip->i_d.di_version < 3 ||
+                      (target_log_flags & XFS_ILOG_DOWNER));
                break;
        }
 
-
-       xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-       xfs_trans_ijoin(tp, tip, XFS_ILOCK_EXCL | XFS_IOLOCK_EXCL);
-
        xfs_trans_log_inode(tp, ip,  src_log_flags);
        xfs_trans_log_inode(tp, tip, target_log_flags);
 
index 7a2b4da..5690e10 100644 (file)
@@ -855,6 +855,41 @@ xfs_btree_readahead(
        return xfs_btree_readahead_sblock(cur, lr, block);
 }
 
+STATIC xfs_daddr_t
+xfs_btree_ptr_to_daddr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr)
+{
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
+               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
+
+               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
+       } else {
+               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
+               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
+
+               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
+                                       be32_to_cpu(ptr->s));
+       }
+}
+
+/*
+ * Readahead @count btree blocks at the given @ptr location.
+ *
+ * We don't need to care about long or short form btrees here as we have a
+ * method of converting the ptr directly to a daddr available to us.
+ */
+STATIC void
+xfs_btree_readahead_ptr(
+       struct xfs_btree_cur    *cur,
+       union xfs_btree_ptr     *ptr,
+       xfs_extlen_t            count)
+{
+       xfs_buf_readahead(cur->bc_mp->m_ddev_targp,
+                         xfs_btree_ptr_to_daddr(cur, ptr),
+                         cur->bc_mp->m_bsize * count, cur->bc_ops->buf_ops);
+}
+
 /*
  * Set the buffer for level "lev" in the cursor to bp, releasing
  * any previous buffer.
@@ -1073,24 +1108,6 @@ xfs_btree_buf_to_ptr(
        }
 }
 
-STATIC xfs_daddr_t
-xfs_btree_ptr_to_daddr(
-       struct xfs_btree_cur    *cur,
-       union xfs_btree_ptr     *ptr)
-{
-       if (cur->bc_flags & XFS_BTREE_LONG_PTRS) {
-               ASSERT(ptr->l != cpu_to_be64(NULLDFSBNO));
-
-               return XFS_FSB_TO_DADDR(cur->bc_mp, be64_to_cpu(ptr->l));
-       } else {
-               ASSERT(cur->bc_private.a.agno != NULLAGNUMBER);
-               ASSERT(ptr->s != cpu_to_be32(NULLAGBLOCK));
-
-               return XFS_AGB_TO_DADDR(cur->bc_mp, cur->bc_private.a.agno,
-                                       be32_to_cpu(ptr->s));
-       }
-}
-
 STATIC void
 xfs_btree_set_refs(
        struct xfs_btree_cur    *cur,
@@ -3869,3 +3886,120 @@ xfs_btree_get_rec(
        *stat = 1;
        return 0;
 }
+
+/*
+ * Change the owner of a btree.
+ *
+ * The mechanism we use here is ordered buffer logging. Because we don't know
+ * how many buffers were are going to need to modify, we don't really want to
+ * have to make transaction reservations for the worst case of every buffer in a
+ * full size btree as that may be more space that we can fit in the log....
+ *
+ * We do the btree walk in the most optimal manner possible - we have sibling
+ * pointers so we can just walk all the blocks on each level from left to right
+ * in a single pass, and then move to the next level and do the same. We can
+ * also do readahead on the sibling pointers to get IO moving more quickly,
+ * though for slow disks this is unlikely to make much difference to performance
+ * as the amount of CPU work we have to do before moving to the next block is
+ * relatively small.
+ *
+ * For each btree block that we load, modify the owner appropriately, set the
+ * buffer as an ordered buffer and log it appropriately. We need to ensure that
+ * we mark the region we change dirty so that if the buffer is relogged in
+ * a subsequent transaction the changes we make here as an ordered buffer are
+ * correctly relogged in that transaction.  If we are in recovery context, then
+ * just queue the modified buffer as delayed write buffer so the transaction
+ * recovery completion writes the changes to disk.
+ */
+static int
+xfs_btree_block_change_owner(
+       struct xfs_btree_cur    *cur,
+       int                     level,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       struct xfs_btree_block  *block;
+       struct xfs_buf          *bp;
+       union xfs_btree_ptr     rptr;
+
+       /* do right sibling readahead */
+       xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
+
+       /* modify the owner */
+       block = xfs_btree_get_block(cur, level, &bp);
+       if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
+               block->bb_u.l.bb_owner = cpu_to_be64(new_owner);
+       else
+               block->bb_u.s.bb_owner = cpu_to_be32(new_owner);
+
+       /*
+        * If the block is a root block hosted in an inode, we might not have a
+        * buffer pointer here and we shouldn't attempt to log the change as the
+        * information is already held in the inode and discarded when the root
+        * block is formatted into the on-disk inode fork. We still change it,
+        * though, so everything is consistent in memory.
+        */
+       if (bp) {
+               if (cur->bc_tp) {
+                       xfs_trans_ordered_buf(cur->bc_tp, bp);
+                       xfs_btree_log_block(cur, bp, XFS_BB_OWNER);
+               } else {
+                       xfs_buf_delwri_queue(bp, buffer_list);
+               }
+       } else {
+               ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
+               ASSERT(level == cur->bc_nlevels - 1);
+       }
+
+       /* now read rh sibling block for next iteration */
+       xfs_btree_get_sibling(cur, block, &rptr, XFS_BB_RIGHTSIB);
+       if (xfs_btree_ptr_is_null(cur, &rptr))
+               return ENOENT;
+
+       return xfs_btree_lookup_get_block(cur, level, &rptr, &block);
+}
+
+int
+xfs_btree_change_owner(
+       struct xfs_btree_cur    *cur,
+       __uint64_t              new_owner,
+       struct list_head        *buffer_list)
+{
+       union xfs_btree_ptr     lptr;
+       int                     level;
+       struct xfs_btree_block  *block = NULL;
+       int                     error = 0;
+
+       cur->bc_ops->init_ptr_from_cur(cur, &lptr);
+
+       /* for each level */
+       for (level = cur->bc_nlevels - 1; level >= 0; level--) {
+               /* grab the left hand block */
+               error = xfs_btree_lookup_get_block(cur, level, &lptr, &block);
+               if (error)
+                       return error;
+
+               /* readahead the left most block for the next level down */
+               if (level > 0) {
+                       union xfs_btree_ptr     *ptr;
+
+                       ptr = xfs_btree_ptr_addr(cur, 1, block);
+                       xfs_btree_readahead_ptr(cur, ptr, 1);
+
+                       /* save for the next iteration of the loop */
+                       lptr = *ptr;
+               }
+
+               /* for each buffer in the level */
+               do {
+                       error = xfs_btree_block_change_owner(cur, level,
+                                                            new_owner,
+                                                            buffer_list);
+               } while (!error);
+
+               if (error != ENOENT)
+                       return error;
+       }
+
+       return 0;
+}
index c8473c7..06729b6 100644 (file)
@@ -121,15 +121,18 @@ union xfs_btree_rec {
 /*
  * For logging record fields.
  */
-#define        XFS_BB_MAGIC            0x01
-#define        XFS_BB_LEVEL            0x02
-#define        XFS_BB_NUMRECS          0x04
-#define        XFS_BB_LEFTSIB          0x08
-#define        XFS_BB_RIGHTSIB         0x10
-#define        XFS_BB_BLKNO            0x20
+#define        XFS_BB_MAGIC            (1 << 0)
+#define        XFS_BB_LEVEL            (1 << 1)
+#define        XFS_BB_NUMRECS          (1 << 2)
+#define        XFS_BB_LEFTSIB          (1 << 3)
+#define        XFS_BB_RIGHTSIB         (1 << 4)
+#define        XFS_BB_BLKNO            (1 << 5)
+#define        XFS_BB_LSN              (1 << 6)
+#define        XFS_BB_UUID             (1 << 7)
+#define        XFS_BB_OWNER            (1 << 8)
 #define        XFS_BB_NUM_BITS         5
 #define        XFS_BB_ALL_BITS         ((1 << XFS_BB_NUM_BITS) - 1)
-#define        XFS_BB_NUM_BITS_CRC     8
+#define        XFS_BB_NUM_BITS_CRC     9
 #define        XFS_BB_ALL_BITS_CRC     ((1 << XFS_BB_NUM_BITS_CRC) - 1)
 
 /*
@@ -442,6 +445,8 @@ int xfs_btree_new_iroot(struct xfs_btree_cur *, int *, int *);
 int xfs_btree_insert(struct xfs_btree_cur *, int *);
 int xfs_btree_delete(struct xfs_btree_cur *, int *);
 int xfs_btree_get_rec(struct xfs_btree_cur *, union xfs_btree_rec **, int *);
+int xfs_btree_change_owner(struct xfs_btree_cur *cur, __uint64_t new_owner,
+                          struct list_head *buffer_list);
 
 /*
  * btree block CRC helpers
index c06823f..2634700 100644 (file)
@@ -80,54 +80,6 @@ xfs_buf_vmap_len(
        return (bp->b_page_count * PAGE_SIZE) - bp->b_offset;
 }
 
-/*
- * xfs_buf_lru_add - add a buffer to the LRU.
- *
- * The LRU takes a new reference to the buffer so that it will only be freed
- * once the shrinker takes the buffer off the LRU.
- */
-STATIC void
-xfs_buf_lru_add(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (list_empty(&bp->b_lru)) {
-               atomic_inc(&bp->b_hold);
-               list_add_tail(&bp->b_lru, &btp->bt_lru);
-               btp->bt_lru_nr++;
-               bp->b_lru_flags &= ~_XBF_LRU_DISPOSE;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
-/*
- * xfs_buf_lru_del - remove a buffer from the LRU
- *
- * The unlocked check is safe here because it only occurs when there are not
- * b_lru_ref counts left on the inode under the pag->pag_buf_lock. it is there
- * to optimise the shrinker removing the buffer from the LRU and calling
- * xfs_buf_free(). i.e. it removes an unnecessary round trip on the
- * bt_lru_lock.
- */
-STATIC void
-xfs_buf_lru_del(
-       struct xfs_buf  *bp)
-{
-       struct xfs_buftarg *btp = bp->b_target;
-
-       if (list_empty(&bp->b_lru))
-               return;
-
-       spin_lock(&btp->bt_lru_lock);
-       if (!list_empty(&bp->b_lru)) {
-               list_del_init(&bp->b_lru);
-               btp->bt_lru_nr--;
-       }
-       spin_unlock(&btp->bt_lru_lock);
-}
-
 /*
  * When we mark a buffer stale, we remove the buffer from the LRU and clear the
  * b_lru_ref count so that the buffer is freed immediately when the buffer
@@ -151,20 +103,14 @@ xfs_buf_stale(
         */
        bp->b_flags &= ~_XBF_DELWRI_Q;
 
-       atomic_set(&(bp)->b_lru_ref, 0);
-       if (!list_empty(&bp->b_lru)) {
-               struct xfs_buftarg *btp = bp->b_target;
+       spin_lock(&bp->b_lock);
+       atomic_set(&bp->b_lru_ref, 0);
+       if (!(bp->b_state & XFS_BSTATE_DISPOSE) &&
+           (list_lru_del(&bp->b_target->bt_lru, &bp->b_lru)))
+               atomic_dec(&bp->b_hold);
 
-               spin_lock(&btp->bt_lru_lock);
-               if (!list_empty(&bp->b_lru) &&
-                   !(bp->b_lru_flags & _XBF_LRU_DISPOSE)) {
-                       list_del_init(&bp->b_lru);
-                       btp->bt_lru_nr--;
-                       atomic_dec(&bp->b_hold);
-               }
-               spin_unlock(&btp->bt_lru_lock);
-       }
        ASSERT(atomic_read(&bp->b_hold) >= 1);
+       spin_unlock(&bp->b_lock);
 }
 
 static int
@@ -228,6 +174,7 @@ _xfs_buf_alloc(
        INIT_LIST_HEAD(&bp->b_list);
        RB_CLEAR_NODE(&bp->b_rbnode);
        sema_init(&bp->b_sema, 0); /* held, no waiters */
+       spin_lock_init(&bp->b_lock);
        XB_SET_OWNER(bp);
        bp->b_target = target;
        bp->b_flags = flags;
@@ -917,12 +864,33 @@ xfs_buf_rele(
 
        ASSERT(atomic_read(&bp->b_hold) > 0);
        if (atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock)) {
-               if (!(bp->b_flags & XBF_STALE) &&
-                          atomic_read(&bp->b_lru_ref)) {
-                       xfs_buf_lru_add(bp);
+               spin_lock(&bp->b_lock);
+               if (!(bp->b_flags & XBF_STALE) && atomic_read(&bp->b_lru_ref)) {
+                       /*
+                        * If the buffer is added to the LRU take a new
+                        * reference to the buffer for the LRU and clear the
+                        * (now stale) dispose list state flag
+                        */
+                       if (list_lru_add(&bp->b_target->bt_lru, &bp->b_lru)) {
+                               bp->b_state &= ~XFS_BSTATE_DISPOSE;
+                               atomic_inc(&bp->b_hold);
+                       }
+                       spin_unlock(&bp->b_lock);
                        spin_unlock(&pag->pag_buf_lock);
                } else {
-                       xfs_buf_lru_del(bp);
+                       /*
+                        * most of the time buffers will already be removed from
+                        * the LRU, so optimise that case by checking for the
+                        * XFS_BSTATE_DISPOSE flag indicating the last list the
+                        * buffer was on was the disposal list
+                        */
+                       if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
+                               list_lru_del(&bp->b_target->bt_lru, &bp->b_lru);
+                       } else {
+                               ASSERT(list_empty(&bp->b_lru));
+                       }
+                       spin_unlock(&bp->b_lock);
+
                        ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
                        rb_erase(&bp->b_rbnode, &pag->pag_buf_tree);
                        spin_unlock(&pag->pag_buf_lock);
@@ -1502,83 +1470,121 @@ xfs_buf_iomove(
  * returned. These buffers will have an elevated hold count, so wait on those
  * while freeing all the buffers only held by the LRU.
  */
+static enum lru_status
+xfs_buftarg_wait_rele(
+       struct list_head        *item,
+       spinlock_t              *lru_lock,
+       void                    *arg)
+
+{
+       struct xfs_buf          *bp = container_of(item, struct xfs_buf, b_lru);
+       struct list_head        *dispose = arg;
+
+       if (atomic_read(&bp->b_hold) > 1) {
+               /* need to wait, so skip it this pass */
+               trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
+               return LRU_SKIP;
+       }
+       if (!spin_trylock(&bp->b_lock))
+               return LRU_SKIP;
+
+       /*
+        * clear the LRU reference count so the buffer doesn't get
+        * ignored in xfs_buf_rele().
+        */
+       atomic_set(&bp->b_lru_ref, 0);
+       bp->b_state |= XFS_BSTATE_DISPOSE;
+       list_move(item, dispose);
+       spin_unlock(&bp->b_lock);
+       return LRU_REMOVED;
+}
+
 void
 xfs_wait_buftarg(
        struct xfs_buftarg      *btp)
 {
-       struct xfs_buf          *bp;
+       LIST_HEAD(dispose);
+       int loop = 0;
 
-restart:
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-               if (atomic_read(&bp->b_hold) > 1) {
-                       trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
-                       list_move_tail(&bp->b_lru, &btp->bt_lru);
-                       spin_unlock(&btp->bt_lru_lock);
-                       delay(100);
-                       goto restart;
+       /* loop until there is nothing left on the lru list. */
+       while (list_lru_count(&btp->bt_lru)) {
+               list_lru_walk(&btp->bt_lru, xfs_buftarg_wait_rele,
+                             &dispose, LONG_MAX);
+
+               while (!list_empty(&dispose)) {
+                       struct xfs_buf *bp;
+                       bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
+                       list_del_init(&bp->b_lru);
+                       xfs_buf_rele(bp);
                }
-               /*
-                * clear the LRU reference count so the buffer doesn't get
-                * ignored in xfs_buf_rele().
-                */
-               atomic_set(&bp->b_lru_ref, 0);
-               spin_unlock(&btp->bt_lru_lock);
-               xfs_buf_rele(bp);
-               spin_lock(&btp->bt_lru_lock);
+               if (loop++ != 0)
+                       delay(100);
        }
-       spin_unlock(&btp->bt_lru_lock);
 }
 
-int
-xfs_buftarg_shrink(
+static enum lru_status
+xfs_buftarg_isolate(
+       struct list_head        *item,
+       spinlock_t              *lru_lock,
+       void                    *arg)
+{
+       struct xfs_buf          *bp = container_of(item, struct xfs_buf, b_lru);
+       struct list_head        *dispose = arg;
+
+       /*
+        * we are inverting the lru lock/bp->b_lock here, so use a trylock.
+        * If we fail to get the lock, just skip it.
+        */
+       if (!spin_trylock(&bp->b_lock))
+               return LRU_SKIP;
+       /*
+        * Decrement the b_lru_ref count unless the value is already
+        * zero. If the value is already zero, we need to reclaim the
+        * buffer, otherwise it gets another trip through the LRU.
+        */
+       if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
+               spin_unlock(&bp->b_lock);
+               return LRU_ROTATE;
+       }
+
+       bp->b_state |= XFS_BSTATE_DISPOSE;
+       list_move(item, dispose);
+       spin_unlock(&bp->b_lock);
+       return LRU_REMOVED;
+}
+
+static unsigned long
+xfs_buftarg_shrink_scan(
        struct shrinker         *shrink,
        struct shrink_control   *sc)
 {
        struct xfs_buftarg      *btp = container_of(shrink,
                                        struct xfs_buftarg, bt_shrinker);
-       struct xfs_buf          *bp;
-       int nr_to_scan = sc->nr_to_scan;
        LIST_HEAD(dispose);
+       unsigned long           freed;
+       unsigned long           nr_to_scan = sc->nr_to_scan;
 
-       if (!nr_to_scan)
-               return btp->bt_lru_nr;
-
-       spin_lock(&btp->bt_lru_lock);
-       while (!list_empty(&btp->bt_lru)) {
-               if (nr_to_scan-- <= 0)
-                       break;
-
-               bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
-
-               /*
-                * Decrement the b_lru_ref count unless the value is already
-                * zero. If the value is already zero, we need to reclaim the
-                * buffer, otherwise it gets another trip through the LRU.
-                */
-               if (!atomic_add_unless(&bp->b_lru_ref, -1, 0)) {
-                       list_move_tail(&bp->b_lru, &btp->bt_lru);
-                       continue;
-               }
-
-               /*
-                * remove the buffer from the LRU now to avoid needing another
-                * lock round trip inside xfs_buf_rele().
-                */
-               list_move(&bp->b_lru, &dispose);
-               btp->bt_lru_nr--;
-               bp->b_lru_flags |= _XBF_LRU_DISPOSE;
-       }
-       spin_unlock(&btp->bt_lru_lock);
+       freed = list_lru_walk_node(&btp->bt_lru, sc->nid, xfs_buftarg_isolate,
+                                      &dispose, &nr_to_scan);
 
        while (!list_empty(&dispose)) {
+               struct xfs_buf *bp;
                bp = list_first_entry(&dispose, struct xfs_buf, b_lru);
                list_del_init(&bp->b_lru);
                xfs_buf_rele(bp);
        }
 
-       return btp->bt_lru_nr;
+       return freed;
+}
+
+static unsigned long
+xfs_buftarg_shrink_count(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_buftarg      *btp = container_of(shrink,
+                                       struct xfs_buftarg, bt_shrinker);
+       return list_lru_count_node(&btp->bt_lru, sc->nid);
 }
 
 void
@@ -1587,6 +1593,7 @@ xfs_free_buftarg(
        struct xfs_buftarg      *btp)
 {
        unregister_shrinker(&btp->bt_shrinker);
+       list_lru_destroy(&btp->bt_lru);
 
        if (mp->m_flags & XFS_MOUNT_BARRIER)
                xfs_blkdev_issue_flush(btp);
@@ -1660,12 +1667,16 @@ xfs_alloc_buftarg(
        if (!btp->bt_bdi)
                goto error;
 
-       INIT_LIST_HEAD(&btp->bt_lru);
-       spin_lock_init(&btp->bt_lru_lock);
        if (xfs_setsize_buftarg_early(btp, bdev))
                goto error;
-       btp->bt_shrinker.shrink = xfs_buftarg_shrink;
+
+       if (list_lru_init(&btp->bt_lru))
+               goto error;
+
+       btp->bt_shrinker.count_objects = xfs_buftarg_shrink_count;
+       btp->bt_shrinker.scan_objects = xfs_buftarg_shrink_scan;
        btp->bt_shrinker.seeks = DEFAULT_SEEKS;
+       btp->bt_shrinker.flags = SHRINKER_NUMA_AWARE;
        register_shrinker(&btp->bt_shrinker);
        return btp;
 
index 433a12e..e656833 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/fs.h>
 #include <linux/buffer_head.h>
 #include <linux/uio.h>
+#include <linux/list_lru.h>
 
 /*
  *     Base types
@@ -59,7 +60,6 @@ typedef enum {
 #define _XBF_KMEM       (1 << 21)/* backed by heap memory */
 #define _XBF_DELWRI_Q   (1 << 22)/* buffer on a delwri queue */
 #define _XBF_COMPOUND   (1 << 23)/* compound buffer */
-#define _XBF_LRU_DISPOSE (1 << 24)/* buffer being discarded */
 
 typedef unsigned int xfs_buf_flags_t;
 
@@ -78,8 +78,12 @@ typedef unsigned int xfs_buf_flags_t;
        { _XBF_PAGES,           "PAGES" }, \
        { _XBF_KMEM,            "KMEM" }, \
        { _XBF_DELWRI_Q,        "DELWRI_Q" }, \
-       { _XBF_COMPOUND,        "COMPOUND" }, \
-       { _XBF_LRU_DISPOSE,     "LRU_DISPOSE" }
+       { _XBF_COMPOUND,        "COMPOUND" }
+
+/*
+ * Internal state flags.
+ */
+#define XFS_BSTATE_DISPOSE      (1 << 0)       /* buffer being discarded */
 
 typedef struct xfs_buftarg {
        dev_t                   bt_dev;
@@ -92,9 +96,7 @@ typedef struct xfs_buftarg {
 
        /* LRU control structures */
        struct shrinker         bt_shrinker;
-       struct list_head        bt_lru;
-       spinlock_t              bt_lru_lock;
-       unsigned int            bt_lru_nr;
+       struct list_lru         bt_lru;
 } xfs_buftarg_t;
 
 struct xfs_buf;
@@ -137,7 +139,8 @@ typedef struct xfs_buf {
         * bt_lru_lock and not by b_sema
         */
        struct list_head        b_lru;          /* lru list */
-       xfs_buf_flags_t         b_lru_flags;    /* internal lru status flags */
+       spinlock_t              b_lock;         /* internal state lock */
+       unsigned int            b_state;        /* internal state flags */
        wait_queue_head_t       b_waiters;      /* unpin waiters */
        struct list_head        b_list;
        struct xfs_perag        *b_pag;         /* contains rbtree root */
index 3a944b1..88c5ea7 100644 (file)
@@ -613,13 +613,27 @@ xfs_buf_item_unlock(
                        }
                }
        }
-       if (clean || aborted) {
-               if (atomic_dec_and_test(&bip->bli_refcount)) {
-                       ASSERT(!aborted || XFS_FORCED_SHUTDOWN(lip->li_mountp));
+
+       /*
+        * Clean buffers, by definition, cannot be in the AIL. However, aborted
+        * buffers may be dirty and hence in the AIL. Therefore if we are
+        * aborting a buffer and we've just taken the last refernce away, we
+        * have to check if it is in the AIL before freeing it. We need to free
+        * it in this case, because an aborted transaction has already shut the
+        * filesystem down and this is the last chance we will have to do so.
+        */
+       if (atomic_dec_and_test(&bip->bli_refcount)) {
+               if (clean)
+                       xfs_buf_item_relse(bp);
+               else if (aborted) {
+                       ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
+                       if (lip->li_flags & XFS_LI_IN_AIL) {
+                               xfs_trans_ail_delete(lip->li_ailp, lip,
+                                                    SHUTDOWN_LOG_IO_ERROR);
+                       }
                        xfs_buf_item_relse(bp);
                }
-       } else
-               atomic_dec(&bip->bli_refcount);
+       }
 
        if (!(flags & XFS_BLI_HOLD))
                xfs_buf_relse(bp);
index d4e59a4..069537c 100644 (file)
@@ -635,6 +635,7 @@ xfs_da3_root_split(
        xfs_trans_log_buf(tp, bp, 0, size - 1);
 
        bp->b_ops = blk1->bp->b_ops;
+       xfs_trans_buf_copy_type(bp, blk1->bp);
        blk1->bp = bp;
        blk1->blkno = blkno;
 
index 08984ee..1021c83 100644 (file)
@@ -180,6 +180,11 @@ xfs_dir3_leaf_check_int(
        return true;
 }
 
+/*
+ * We verify the magic numbers before decoding the leaf header so that on debug
+ * kernels we don't get assertion failures in xfs_dir3_leaf_hdr_from_disk() due
+ * to incorrect magic numbers.
+ */
 static bool
 xfs_dir3_leaf_verify(
        struct xfs_buf          *bp,
@@ -191,24 +196,25 @@ xfs_dir3_leaf_verify(
 
        ASSERT(magic == XFS_DIR2_LEAF1_MAGIC || magic == XFS_DIR2_LEAFN_MAGIC);
 
-       xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
        if (xfs_sb_version_hascrc(&mp->m_sb)) {
                struct xfs_dir3_leaf_hdr *leaf3 = bp->b_addr;
+               __uint16_t              magic3;
 
-               if ((magic == XFS_DIR2_LEAF1_MAGIC &&
-                    leafhdr.magic != XFS_DIR3_LEAF1_MAGIC) ||
-                   (magic == XFS_DIR2_LEAFN_MAGIC &&
-                    leafhdr.magic != XFS_DIR3_LEAFN_MAGIC))
-                       return false;
+               magic3 = (magic == XFS_DIR2_LEAF1_MAGIC) ? XFS_DIR3_LEAF1_MAGIC
+                                                        : XFS_DIR3_LEAFN_MAGIC;
 
+               if (leaf3->info.hdr.magic != cpu_to_be16(magic3))
+                       return false;
                if (!uuid_equal(&leaf3->info.uuid, &mp->m_sb.sb_uuid))
                        return false;
                if (be64_to_cpu(leaf3->info.blkno) != bp->b_bn)
                        return false;
        } else {
-               if (leafhdr.magic != magic)
+               if (leaf->hdr.info.magic != cpu_to_be16(magic))
                        return false;
        }
+
+       xfs_dir3_leaf_hdr_from_disk(&leafhdr, leaf);
        return xfs_dir3_leaf_check_int(mp, &leafhdr, leaf);
 }
 
index 251c666..71520e6 100644 (file)
@@ -940,13 +940,8 @@ xfs_qm_dqput_final(
 
        trace_xfs_dqput_free(dqp);
 
-       mutex_lock(&qi->qi_lru_lock);
-       if (list_empty(&dqp->q_lru)) {
-               list_add_tail(&dqp->q_lru, &qi->qi_lru_list);
-               qi->qi_lru_count++;
+       if (list_lru_add(&qi->qi_lru, &dqp->q_lru))
                XFS_STATS_INC(xs_qm_dquot_unused);
-       }
-       mutex_unlock(&qi->qi_lru_lock);
 
        /*
         * If we just added a udquot to the freelist, then we want to release
index 60c6e1f..e838d84 100644 (file)
@@ -142,7 +142,8 @@ xfs_qm_dqunpin_wait(
 STATIC uint
 xfs_qm_dquot_logitem_push(
        struct xfs_log_item     *lip,
-       struct list_head        *buffer_list)
+       struct list_head        *buffer_list) __releases(&lip->li_ailp->xa_lock)
+                                             __acquires(&lip->li_ailp->xa_lock)
 {
        struct xfs_dquot        *dqp = DQUOT_ITEM(lip)->qli_dquot;
        struct xfs_buf          *bp = NULL;
index 86f559f..e43708e 100644 (file)
@@ -160,7 +160,8 @@ xfs_extent_busy_update_extent(
        struct xfs_extent_busy  *busyp,
        xfs_agblock_t           fbno,
        xfs_extlen_t            flen,
-       bool                    userdata)
+       bool                    userdata) __releases(&pag->pagb_lock)
+                                         __acquires(&pag->pagb_lock)
 {
        xfs_agblock_t           fend = fbno + flen;
        xfs_agblock_t           bbno = busyp->bno;
index 16219b9..193206b 100644 (file)
@@ -48,7 +48,7 @@ STATIC void __xfs_inode_clear_reclaim_tag(struct xfs_mount *mp,
 /*
  * Allocate and initialise an xfs_inode.
  */
-STATIC struct xfs_inode *
+struct xfs_inode *
 xfs_inode_alloc(
        struct xfs_mount        *mp,
        xfs_ino_t               ino)
@@ -98,7 +98,7 @@ xfs_inode_free_callback(
        kmem_zone_free(xfs_inode_zone, ip);
 }
 
-STATIC void
+void
 xfs_inode_free(
        struct xfs_inode        *ip)
 {
@@ -1167,7 +1167,7 @@ xfs_reclaim_inodes(
  * them to be cleaned, which we hope will not be very long due to the
  * background walker having already kicked the IO off on those dirty inodes.
  */
-void
+long
 xfs_reclaim_inodes_nr(
        struct xfs_mount        *mp,
        int                     nr_to_scan)
@@ -1176,7 +1176,7 @@ xfs_reclaim_inodes_nr(
        xfs_reclaim_work_queue(mp);
        xfs_ail_push_all(mp->m_ail);
 
-       xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
+       return xfs_reclaim_inodes_ag(mp, SYNC_TRYLOCK | SYNC_WAIT, &nr_to_scan);
 }
 
 /*
index 8a89f7d..9ed68bb 100644 (file)
@@ -42,11 +42,15 @@ struct xfs_eofblocks {
 int xfs_iget(struct xfs_mount *mp, struct xfs_trans *tp, xfs_ino_t ino,
             uint flags, uint lock_flags, xfs_inode_t **ipp);
 
+/* recovery needs direct inode allocation capability */
+struct xfs_inode * xfs_inode_alloc(struct xfs_mount *mp, xfs_ino_t ino);
+void xfs_inode_free(struct xfs_inode *ip);
+
 void xfs_reclaim_worker(struct work_struct *work);
 
 int xfs_reclaim_inodes(struct xfs_mount *mp, int mode);
 int xfs_reclaim_inodes_count(struct xfs_mount *mp);
-void xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
+long xfs_reclaim_inodes_nr(struct xfs_mount *mp, int nr_to_scan);
 
 void xfs_inode_set_reclaim_tag(struct xfs_inode *ip);
 
index e011d59..63382d3 100644 (file)
@@ -53,9 +53,8 @@ xfs_inobp_check(
                                        i * mp->m_sb.sb_inodesize);
                if (!dip->di_next_unlinked)  {
                        xfs_alert(mp,
-       "Detected bogus zero next_unlinked field in incore inode buffer 0x%p.",
-                               bp);
-                       ASSERT(dip->di_next_unlinked);
+       "Detected bogus zero next_unlinked field in inode %d buffer 0x%llx.",
+                               i, (long long)bp->b_bn);
                }
        }
 }
@@ -106,11 +105,10 @@ xfs_inode_buf_verify(
                        XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_HIGH,
                                             mp, dip);
 #ifdef DEBUG
-                       xfs_emerg(mp,
+                       xfs_alert(mp,
                                "bad inode magic/vsn daddr %lld #%d (magic=%x)",
                                (unsigned long long)bp->b_bn, i,
                                be16_to_cpu(dip->di_magic));
-                       ASSERT(0);
 #endif
                }
        }
@@ -196,7 +194,7 @@ xfs_imap_to_bp(
        return 0;
 }
 
-STATIC void
+void
 xfs_dinode_from_disk(
        xfs_icdinode_t          *to,
        xfs_dinode_t            *from)
index 599e6c0..abba0ae 100644 (file)
@@ -32,17 +32,17 @@ struct xfs_imap {
        ushort          im_boffset;     /* inode offset in block in bytes */
 };
 
-int            xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
-                              struct xfs_imap *, struct xfs_dinode **,
-                              struct xfs_buf **, uint, uint);
-int            xfs_iread(struct xfs_mount *, struct xfs_trans *,
-                         struct xfs_inode *, uint);
-void           xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void           xfs_dinode_to_disk(struct xfs_dinode *,
-                                  struct xfs_icdinode *);
+int    xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
+                      struct xfs_imap *, struct xfs_dinode **,
+                      struct xfs_buf **, uint, uint);
+int    xfs_iread(struct xfs_mount *, struct xfs_trans *,
+                 struct xfs_inode *, uint);
+void   xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
+void   xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
+void   xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
 
 #if defined(DEBUG)
-void           xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
+void   xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
 #else
 #define        xfs_inobp_check(mp, bp)
 #endif /* DEBUG */
index bdebc21..668e8f4 100644 (file)
@@ -71,7 +71,7 @@ xfs_find_handle(
        int                     hsize;
        xfs_handle_t            handle;
        struct inode            *inode;
-       struct fd               f = {0};
+       struct fd               f = {NULL};
        struct path             path;
        int                     error;
        struct xfs_inode        *ip;
@@ -456,12 +456,9 @@ xfs_attrlist_by_handle(
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(al_hreq.buflen);
-               if (!kbuf)
-                       goto out_dput;
-       }
+       kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+       if (!kbuf)
+               goto out_dput;
 
        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
        error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -472,12 +469,9 @@ xfs_attrlist_by_handle(
        if (copy_to_user(al_hreq.buffer, kbuf, al_hreq.buflen))
                error = -EFAULT;
 
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
- out_dput:
+out_kfree:
+       kmem_free(kbuf);
+out_dput:
        dput(dentry);
        return error;
 }
@@ -495,12 +489,9 @@ xfs_attrmulti_attr_get(
 
        if (*len > XATTR_SIZE_MAX)
                return EINVAL;
-       kbuf = kmem_zalloc(*len, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(*len);
-               if (!kbuf)
-                       return ENOMEM;
-       }
+       kbuf = kmem_zalloc_large(*len, KM_SLEEP);
+       if (!kbuf)
+               return ENOMEM;
 
        error = xfs_attr_get(XFS_I(inode), name, kbuf, (int *)len, flags);
        if (error)
@@ -509,11 +500,8 @@ xfs_attrmulti_attr_get(
        if (copy_to_user(ubuf, kbuf, *len))
                error = EFAULT;
 
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
+out_kfree:
+       kmem_free(kbuf);
        return error;
 }
 
index d3ab953..f671f7e 100644 (file)
@@ -371,12 +371,9 @@ xfs_compat_attrlist_by_handle(
                return PTR_ERR(dentry);
 
        error = -ENOMEM;
-       kbuf = kmem_zalloc(al_hreq.buflen, KM_SLEEP | KM_MAYFAIL);
-       if (!kbuf) {
-               kbuf = kmem_zalloc_large(al_hreq.buflen);
-               if (!kbuf)
-                       goto out_dput;
-       }
+       kbuf = kmem_zalloc_large(al_hreq.buflen, KM_SLEEP);
+       if (!kbuf)
+               goto out_dput;
 
        cursor = (attrlist_cursor_kern_t *)&al_hreq.pos;
        error = -xfs_attr_list(XFS_I(dentry->d_inode), kbuf, al_hreq.buflen,
@@ -387,12 +384,9 @@ xfs_compat_attrlist_by_handle(
        if (copy_to_user(compat_ptr(al_hreq.buffer), kbuf, al_hreq.buflen))
                error = -EFAULT;
 
- out_kfree:
-       if (is_vmalloc_addr(kbuf))
-               kmem_free_large(kbuf);
-       else
-               kmem_free(kbuf);
- out_dput:
+out_kfree:
+       kmem_free(kbuf);
+out_dput:
        dput(dentry);
        return error;
 }
index b93e14b..084b3e1 100644 (file)
@@ -495,7 +495,7 @@ xfs_bulkstat(
        /*
         * Done, we're either out of filesystem or space to put the data.
         */
-       kmem_free_large(irbuf);
+       kmem_free(irbuf);
        *ubcountp = ubelem;
        /*
         * Found some inodes, return them now and return the error next time.
@@ -541,8 +541,9 @@ xfs_bulkstat_single(
         * at the expense of the error case.
         */
 
-       ino = (xfs_ino_t)*lastinop;
-       error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t), 0, &res);
+       ino = *lastinop;
+       error = xfs_bulkstat_one(mp, ino, buffer, sizeof(xfs_bstat_t),
+                                NULL, &res);
        if (error) {
                /*
                 * Special case way failed, do it the "long" way
index 5372d58..a2dea10 100644 (file)
@@ -257,7 +257,8 @@ xlog_grant_head_wait(
        struct xlog             *log,
        struct xlog_grant_head  *head,
        struct xlog_ticket      *tic,
-       int                     need_bytes)
+       int                     need_bytes) __releases(&head->lock)
+                                           __acquires(&head->lock)
 {
        list_add_tail(&tic->t_queue, &head->waiters);
 
index 31e3a06..ca7e28a 100644 (file)
@@ -474,6 +474,8 @@ typedef struct xfs_inode_log_format_64 {
 #define        XFS_ILOG_ADATA  0x040   /* log i_af.if_data */
 #define        XFS_ILOG_AEXT   0x080   /* log i_af.if_extents */
 #define        XFS_ILOG_ABROOT 0x100   /* log i_af.i_broot */
+#define XFS_ILOG_DOWNER        0x200   /* change the data fork owner on replay */
+#define XFS_ILOG_AOWNER        0x400   /* change the attr fork owner on replay */
 
 
 /*
@@ -487,7 +489,8 @@ typedef struct xfs_inode_log_format_64 {
 #define        XFS_ILOG_NONCORE        (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                 XFS_ILOG_DBROOT | XFS_ILOG_DEV | \
                                 XFS_ILOG_UUID | XFS_ILOG_ADATA | \
-                                XFS_ILOG_AEXT | XFS_ILOG_ABROOT)
+                                XFS_ILOG_AEXT | XFS_ILOG_ABROOT | \
+                                XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
 
 #define        XFS_ILOG_DFORK          (XFS_ILOG_DDATA | XFS_ILOG_DEXT | \
                                 XFS_ILOG_DBROOT)
@@ -499,7 +502,8 @@ typedef struct xfs_inode_log_format_64 {
                                 XFS_ILOG_DEXT | XFS_ILOG_DBROOT | \
                                 XFS_ILOG_DEV | XFS_ILOG_UUID | \
                                 XFS_ILOG_ADATA | XFS_ILOG_AEXT | \
-                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP)
+                                XFS_ILOG_ABROOT | XFS_ILOG_TIMESTAMP | \
+                                XFS_ILOG_DOWNER | XFS_ILOG_AOWNER)
 
 static inline int xfs_ilog_fbroot(int w)
 {
index 7c0c1fd..dabda95 100644 (file)
@@ -2014,7 +2014,7 @@ xlog_recover_get_buf_lsn(
        case XFS_ATTR3_RMT_MAGIC:
                return be64_to_cpu(((struct xfs_attr3_rmt_hdr *)blk)->rm_lsn);
        case XFS_SB_MAGIC:
-               return be64_to_cpu(((struct xfs_sb *)blk)->sb_lsn);
+               return be64_to_cpu(((struct xfs_dsb *)blk)->sb_lsn);
        default:
                break;
        }
@@ -2629,6 +2629,82 @@ out_release:
        return error;
 }
 
+/*
+ * Inode fork owner changes
+ *
+ * If we have been told that we have to reparent the inode fork, it's because an
+ * extent swap operation on a CRC enabled filesystem has been done and we are
+ * replaying it. We need to walk the BMBT of the appropriate fork and change the
+ * owners of it.
+ *
+ * The complexity here is that we don't have an inode context to work with, so
+ * after we've replayed the inode we need to instantiate one.  This is where the
+ * fun begins.
+ *
+ * We are in the middle of log recovery, so we can't run transactions. That
+ * means we cannot use cache coherent inode instantiation via xfs_iget(), as
+ * that will result in the corresponding iput() running the inode through
+ * xfs_inactive(). If we've just replayed an inode core that changes the link
+ * count to zero (i.e. it's been unlinked), then xfs_inactive() will run
+ * transactions (bad!).
+ *
+ * So, to avoid this, we instantiate an inode directly from the inode core we've
+ * just recovered. We have the buffer still locked, and all we really need to
+ * instantiate is the inode core and the forks being modified. We can do this
+ * manually, then run the inode btree owner change, and then tear down the
+ * xfs_inode without having to run any transactions at all.
+ *
+ * Also, because we don't have a transaction context available here but need to
+ * gather all the buffers we modify for writeback so we pass the buffer_list
+ * instead for the operation to use.
+ */
+
+STATIC int
+xfs_recover_inode_owner_change(
+       struct xfs_mount        *mp,
+       struct xfs_dinode       *dip,
+       struct xfs_inode_log_format *in_f,
+       struct list_head        *buffer_list)
+{
+       struct xfs_inode        *ip;
+       int                     error;
+
+       ASSERT(in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER));
+
+       ip = xfs_inode_alloc(mp, in_f->ilf_ino);
+       if (!ip)
+               return ENOMEM;
+
+       /* instantiate the inode */
+       xfs_dinode_from_disk(&ip->i_d, dip);
+       ASSERT(ip->i_d.di_version >= 3);
+
+       error = xfs_iformat_fork(ip, dip);
+       if (error)
+               goto out_free_ip;
+
+
+       if (in_f->ilf_fields & XFS_ILOG_DOWNER) {
+               ASSERT(in_f->ilf_fields & XFS_ILOG_DBROOT);
+               error = xfs_bmbt_change_owner(NULL, ip, XFS_DATA_FORK,
+                                             ip->i_ino, buffer_list);
+               if (error)
+                       goto out_free_ip;
+       }
+
+       if (in_f->ilf_fields & XFS_ILOG_AOWNER) {
+               ASSERT(in_f->ilf_fields & XFS_ILOG_ABROOT);
+               error = xfs_bmbt_change_owner(NULL, ip, XFS_ATTR_FORK,
+                                             ip->i_ino, buffer_list);
+               if (error)
+                       goto out_free_ip;
+       }
+
+out_free_ip:
+       xfs_inode_free(ip);
+       return error;
+}
+
 STATIC int
 xlog_recover_inode_pass2(
        struct xlog                     *log,
@@ -2681,8 +2757,7 @@ xlog_recover_inode_pass2(
        error = bp->b_error;
        if (error) {
                xfs_buf_ioerror_alert(bp, "xlog_recover_do..(read#2)");
-               xfs_buf_relse(bp);
-               goto error;
+               goto out_release;
        }
        ASSERT(in_f->ilf_fields & XFS_ILOG_CORE);
        dip = (xfs_dinode_t *)xfs_buf_offset(bp, in_f->ilf_boffset);
@@ -2692,30 +2767,31 @@ xlog_recover_inode_pass2(
         * like an inode!
         */
        if (unlikely(dip->di_magic != cpu_to_be16(XFS_DINODE_MAGIC))) {
-               xfs_buf_relse(bp);
                xfs_alert(mp,
        "%s: Bad inode magic number, dip = 0x%p, dino bp = 0x%p, ino = %Ld",
                        __func__, dip, bp, in_f->ilf_ino);
                XFS_ERROR_REPORT("xlog_recover_inode_pass2(1)",
                                 XFS_ERRLEVEL_LOW, mp);
                error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
        }
        dicp = item->ri_buf[1].i_addr;
        if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
-               xfs_buf_relse(bp);
                xfs_alert(mp,
                        "%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
                        __func__, item, in_f->ilf_ino);
                XFS_ERROR_REPORT("xlog_recover_inode_pass2(2)",
                                 XFS_ERRLEVEL_LOW, mp);
                error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
        }
 
        /*
         * If the inode has an LSN in it, recover the inode only if it's less
-        * than the lsn of the transaction we are replaying.
+        * than the lsn of the transaction we are replaying. Note: we still
+        * need to replay an owner change even though the inode is more recent
+        * than the transaction as there is no guarantee that all the btree
+        * blocks are more recent than this transaction, too.
         */
        if (dip->di_version >= 3) {
                xfs_lsn_t       lsn = be64_to_cpu(dip->di_lsn);
@@ -2723,7 +2799,7 @@ xlog_recover_inode_pass2(
                if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
                        trace_xfs_log_recover_inode_skip(log, in_f);
                        error = 0;
-                       goto out_release;
+                       goto out_owner_change;
                }
        }
 
@@ -2745,10 +2821,9 @@ xlog_recover_inode_pass2(
                    dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
                        /* do nothing */
                } else {
-                       xfs_buf_relse(bp);
                        trace_xfs_log_recover_inode_skip(log, in_f);
                        error = 0;
-                       goto error;
+                       goto out_release;
                }
        }
 
@@ -2760,13 +2835,12 @@ xlog_recover_inode_pass2(
                    (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
                                         XFS_ERRLEVEL_LOW, mp, dicp);
-                       xfs_buf_relse(bp);
                        xfs_alert(mp,
                "%s: Bad regular inode log record, rec ptr 0x%p, "
                "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                __func__, item, dip, bp, in_f->ilf_ino);
                        error = EFSCORRUPTED;
-                       goto error;
+                       goto out_release;
                }
        } else if (unlikely(S_ISDIR(dicp->di_mode))) {
                if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
@@ -2774,19 +2848,17 @@ xlog_recover_inode_pass2(
                    (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
                        XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
                                             XFS_ERRLEVEL_LOW, mp, dicp);
-                       xfs_buf_relse(bp);
                        xfs_alert(mp,
                "%s: Bad dir inode log record, rec ptr 0x%p, "
                "ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
                                __func__, item, dip, bp, in_f->ilf_ino);
                        error = EFSCORRUPTED;
-                       goto error;
+                       goto out_release;
                }
        }
        if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
                                     XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                xfs_alert(mp,
        "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
        "dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
@@ -2794,29 +2866,27 @@ xlog_recover_inode_pass2(
                        dicp->di_nextents + dicp->di_anextents,
                        dicp->di_nblocks);
                error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
        }
        if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
                                     XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                xfs_alert(mp,
        "%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
        "dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
                        item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
                error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
        }
        isize = xfs_icdinode_size(dicp->di_version);
        if (unlikely(item->ri_buf[1].i_len > isize)) {
                XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
                                     XFS_ERRLEVEL_LOW, mp, dicp);
-               xfs_buf_relse(bp);
                xfs_alert(mp,
                        "%s: Bad inode log record length %d, rec ptr 0x%p",
                        __func__, item->ri_buf[1].i_len, item);
                error = EFSCORRUPTED;
-               goto error;
+               goto out_release;
        }
 
        /* The core is in in-core format */
@@ -2842,7 +2912,7 @@ xlog_recover_inode_pass2(
        }
 
        if (in_f->ilf_size == 2)
-               goto write_inode_buffer;
+               goto out_owner_change;
        len = item->ri_buf[2].i_len;
        src = item->ri_buf[2].i_addr;
        ASSERT(in_f->ilf_size <= 4);
@@ -2903,13 +2973,15 @@ xlog_recover_inode_pass2(
                default:
                        xfs_warn(log->l_mp, "%s: Invalid flag", __func__);
                        ASSERT(0);
-                       xfs_buf_relse(bp);
                        error = EIO;
-                       goto error;
+                       goto out_release;
                }
        }
 
-write_inode_buffer:
+out_owner_change:
+       if (in_f->ilf_fields & (XFS_ILOG_DOWNER|XFS_ILOG_AOWNER))
+               error = xfs_recover_inode_owner_change(mp, dip, in_f,
+                                                      buffer_list);
        /* re-generate the checksum. */
        xfs_dinode_calc_crc(log->l_mp, dip);
 
index 6218a0a..3e6c2e6 100644 (file)
@@ -51,8 +51,9 @@
  */
 STATIC int     xfs_qm_init_quotainos(xfs_mount_t *);
 STATIC int     xfs_qm_init_quotainfo(xfs_mount_t *);
-STATIC int     xfs_qm_shake(struct shrinker *, struct shrink_control *);
 
+
+STATIC void    xfs_qm_dqfree_one(struct xfs_dquot *dqp);
 /*
  * We use the batch lookup interface to iterate over the dquots as it
  * currently is the only interface into the radix tree code that allows
@@ -203,12 +204,9 @@ xfs_qm_dqpurge(
         * We move dquots to the freelist as soon as their reference count
         * hits zero, so it really should be on the freelist here.
         */
-       mutex_lock(&qi->qi_lru_lock);
        ASSERT(!list_empty(&dqp->q_lru));
-       list_del_init(&dqp->q_lru);
-       qi->qi_lru_count--;
+       list_lru_del(&qi->qi_lru, &dqp->q_lru);
        XFS_STATS_DEC(xs_qm_dquot_unused);
-       mutex_unlock(&qi->qi_lru_lock);
 
        xfs_qm_dqdestroy(dqp);
 
@@ -680,6 +678,143 @@ xfs_qm_calc_dquots_per_chunk(
        return ndquots;
 }
 
+struct xfs_qm_isolate {
+       struct list_head        buffers;
+       struct list_head        dispose;
+};
+
+static enum lru_status
+xfs_qm_dquot_isolate(
+       struct list_head        *item,
+       spinlock_t              *lru_lock,
+       void                    *arg)
+{
+       struct xfs_dquot        *dqp = container_of(item,
+                                               struct xfs_dquot, q_lru);
+       struct xfs_qm_isolate   *isol = arg;
+
+       if (!xfs_dqlock_nowait(dqp))
+               goto out_miss_busy;
+
+       /*
+        * This dquot has acquired a reference in the meantime remove it from
+        * the freelist and try again.
+        */
+       if (dqp->q_nrefs) {
+               xfs_dqunlock(dqp);
+               XFS_STATS_INC(xs_qm_dqwants);
+
+               trace_xfs_dqreclaim_want(dqp);
+               list_del_init(&dqp->q_lru);
+               XFS_STATS_DEC(xs_qm_dquot_unused);
+               return LRU_REMOVED;
+       }
+
+       /*
+        * If the dquot is dirty, flush it. If it's already being flushed, just
+        * skip it so there is time for the IO to complete before we try to
+        * reclaim it again on the next LRU pass.
+        */
+       if (!xfs_dqflock_nowait(dqp)) {
+               xfs_dqunlock(dqp);
+               goto out_miss_busy;
+       }
+
+       if (XFS_DQ_IS_DIRTY(dqp)) {
+               struct xfs_buf  *bp = NULL;
+               int             error;
+
+               trace_xfs_dqreclaim_dirty(dqp);
+
+               /* we have to drop the LRU lock to flush the dquot */
+               spin_unlock(lru_lock);
+
+               error = xfs_qm_dqflush(dqp, &bp);
+               if (error) {
+                       xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
+                                __func__, dqp);
+                       goto out_unlock_dirty;
+               }
+
+               xfs_buf_delwri_queue(bp, &isol->buffers);
+               xfs_buf_relse(bp);
+               goto out_unlock_dirty;
+       }
+       xfs_dqfunlock(dqp);
+
+       /*
+        * Prevent lookups now that we are past the point of no return.
+        */
+       dqp->dq_flags |= XFS_DQ_FREEING;
+       xfs_dqunlock(dqp);
+
+       ASSERT(dqp->q_nrefs == 0);
+       list_move_tail(&dqp->q_lru, &isol->dispose);
+       XFS_STATS_DEC(xs_qm_dquot_unused);
+       trace_xfs_dqreclaim_done(dqp);
+       XFS_STATS_INC(xs_qm_dqreclaims);
+       return LRU_REMOVED;
+
+out_miss_busy:
+       trace_xfs_dqreclaim_busy(dqp);
+       XFS_STATS_INC(xs_qm_dqreclaim_misses);
+       return LRU_SKIP;
+
+out_unlock_dirty:
+       trace_xfs_dqreclaim_busy(dqp);
+       XFS_STATS_INC(xs_qm_dqreclaim_misses);
+       xfs_dqunlock(dqp);
+       spin_lock(lru_lock);
+       return LRU_RETRY;
+}
+
+static unsigned long
+xfs_qm_shrink_scan(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_quotainfo    *qi = container_of(shrink,
+                                       struct xfs_quotainfo, qi_shrinker);
+       struct xfs_qm_isolate   isol;
+       unsigned long           freed;
+       int                     error;
+       unsigned long           nr_to_scan = sc->nr_to_scan;
+
+       if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
+               return 0;
+
+       INIT_LIST_HEAD(&isol.buffers);
+       INIT_LIST_HEAD(&isol.dispose);
+
+       freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol,
+                                       &nr_to_scan);
+
+       error = xfs_buf_delwri_submit(&isol.buffers);
+       if (error)
+               xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
+
+       while (!list_empty(&isol.dispose)) {
+               struct xfs_dquot        *dqp;
+
+               dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
+               list_del_init(&dqp->q_lru);
+               xfs_qm_dqfree_one(dqp);
+       }
+
+       return freed;
+}
+
+static unsigned long
+xfs_qm_shrink_count(
+       struct shrinker         *shrink,
+       struct shrink_control   *sc)
+{
+       struct xfs_quotainfo    *qi = container_of(shrink,
+                                       struct xfs_quotainfo, qi_shrinker);
+
+       return list_lru_count_node(&qi->qi_lru, sc->nid);
+}
+
 /*
  * This initializes all the quota information that's kept in the
  * mount structure
@@ -696,11 +831,18 @@ xfs_qm_init_quotainfo(
 
        qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
 
+       if ((error = list_lru_init(&qinf->qi_lru))) {
+               kmem_free(qinf);
+               mp->m_quotainfo = NULL;
+               return error;
+       }
+
        /*
         * See if quotainodes are setup, and if not, allocate them,
         * and change the superblock accordingly.
         */
        if ((error = xfs_qm_init_quotainos(mp))) {
+               list_lru_destroy(&qinf->qi_lru);
                kmem_free(qinf);
                mp->m_quotainfo = NULL;
                return error;
@@ -711,10 +853,6 @@ xfs_qm_init_quotainfo(
        INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
        mutex_init(&qinf->qi_tree_lock);
 
-       INIT_LIST_HEAD(&qinf->qi_lru_list);
-       qinf->qi_lru_count = 0;
-       mutex_init(&qinf->qi_lru_lock);
-
        /* mutex used to serialize quotaoffs */
        mutex_init(&qinf->qi_quotaofflock);
 
@@ -779,8 +917,10 @@ xfs_qm_init_quotainfo(
                qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
        }
 
-       qinf->qi_shrinker.shrink = xfs_qm_shake;
+       qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
+       qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
        qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
+       qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
        register_shrinker(&qinf->qi_shrinker);
        return 0;
 }
@@ -801,6 +941,7 @@ xfs_qm_destroy_quotainfo(
        ASSERT(qi != NULL);
 
        unregister_shrinker(&qi->qi_shrinker);
+       list_lru_destroy(&qi->qi_lru);
 
        if (qi->qi_uquotaip) {
                IRELE(qi->qi_uquotaip);
@@ -1599,132 +1740,6 @@ xfs_qm_dqfree_one(
        xfs_qm_dqdestroy(dqp);
 }
 
-STATIC void
-xfs_qm_dqreclaim_one(
-       struct xfs_dquot        *dqp,
-       struct list_head        *buffer_list,
-       struct list_head        *dispose_list)
-{
-       struct xfs_mount        *mp = dqp->q_mount;
-       struct xfs_quotainfo    *qi = mp->m_quotainfo;
-       int                     error;
-
-       if (!xfs_dqlock_nowait(dqp))
-               goto out_move_tail;
-
-       /*
-        * This dquot has acquired a reference in the meantime remove it from
-        * the freelist and try again.
-        */
-       if (dqp->q_nrefs) {
-               xfs_dqunlock(dqp);
-
-               trace_xfs_dqreclaim_want(dqp);
-               XFS_STATS_INC(xs_qm_dqwants);
-
-               list_del_init(&dqp->q_lru);
-               qi->qi_lru_count--;
-               XFS_STATS_DEC(xs_qm_dquot_unused);
-               return;
-       }
-
-       /*
-        * Try to grab the flush lock. If this dquot is in the process of
-        * getting flushed to disk, we don't want to reclaim it.
-        */
-       if (!xfs_dqflock_nowait(dqp))
-               goto out_unlock_move_tail;
-
-       if (XFS_DQ_IS_DIRTY(dqp)) {
-               struct xfs_buf  *bp = NULL;
-
-               trace_xfs_dqreclaim_dirty(dqp);
-
-               error = xfs_qm_dqflush(dqp, &bp);
-               if (error) {
-                       xfs_warn(mp, "%s: dquot %p flush failed",
-                                __func__, dqp);
-                       goto out_unlock_move_tail;
-               }
-
-               xfs_buf_delwri_queue(bp, buffer_list);
-               xfs_buf_relse(bp);
-               /*
-                * Give the dquot another try on the freelist, as the
-                * flushing will take some time.
-                */
-               goto out_unlock_move_tail;
-       }
-       xfs_dqfunlock(dqp);
-
-       /*
-        * Prevent lookups now that we are past the point of no return.
-        */
-       dqp->dq_flags |= XFS_DQ_FREEING;
-       xfs_dqunlock(dqp);
-
-       ASSERT(dqp->q_nrefs == 0);
-       list_move_tail(&dqp->q_lru, dispose_list);
-       qi->qi_lru_count--;
-       XFS_STATS_DEC(xs_qm_dquot_unused);
-
-       trace_xfs_dqreclaim_done(dqp);
-       XFS_STATS_INC(xs_qm_dqreclaims);
-       return;
-
-       /*
-        * Move the dquot to the tail of the list so that we don't spin on it.
-        */
-out_unlock_move_tail:
-       xfs_dqunlock(dqp);
-out_move_tail:
-       list_move_tail(&dqp->q_lru, &qi->qi_lru_list);
-       trace_xfs_dqreclaim_busy(dqp);
-       XFS_STATS_INC(xs_qm_dqreclaim_misses);
-}
-
-STATIC int
-xfs_qm_shake(
-       struct shrinker         *shrink,
-       struct shrink_control   *sc)
-{
-       struct xfs_quotainfo    *qi =
-               container_of(shrink, struct xfs_quotainfo, qi_shrinker);
-       int                     nr_to_scan = sc->nr_to_scan;
-       LIST_HEAD               (buffer_list);
-       LIST_HEAD               (dispose_list);
-       struct xfs_dquot        *dqp;
-       int                     error;
-
-       if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
-               return 0;
-       if (!nr_to_scan)
-               goto out;
-
-       mutex_lock(&qi->qi_lru_lock);
-       while (!list_empty(&qi->qi_lru_list)) {
-               if (nr_to_scan-- <= 0)
-                       break;
-               dqp = list_first_entry(&qi->qi_lru_list, struct xfs_dquot,
-                                      q_lru);
-               xfs_qm_dqreclaim_one(dqp, &buffer_list, &dispose_list);
-       }
-       mutex_unlock(&qi->qi_lru_lock);
-
-       error = xfs_buf_delwri_submit(&buffer_list);
-       if (error)
-               xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
-
-       while (!list_empty(&dispose_list)) {
-               dqp = list_first_entry(&dispose_list, struct xfs_dquot, q_lru);
-               list_del_init(&dqp->q_lru);
-               xfs_qm_dqfree_one(dqp);
-       }
-
-out:
-       return (qi->qi_lru_count / 100) * sysctl_vfs_cache_pressure;
-}
-
 /*
  * Start a transaction and write the incore superblock changes to
  * disk. flags parameter indicates which fields have changed.
index 670cd44..2b602df 100644 (file)
@@ -49,9 +49,7 @@ typedef struct xfs_quotainfo {
        struct xfs_inode        *qi_uquotaip;   /* user quota inode */
        struct xfs_inode        *qi_gquotaip;   /* group quota inode */
        struct xfs_inode        *qi_pquotaip;   /* project quota inode */
-       struct list_head qi_lru_list;
-       struct mutex     qi_lru_lock;
-       int              qi_lru_count;
+       struct list_lru  qi_lru;
        int              qi_dquots;
        time_t           qi_btimelimit;  /* limit for blks timer */
        time_t           qi_itimelimit;  /* limit for inodes timer */
index 979a77d..15188cc 100644 (file)
@@ -1535,19 +1535,21 @@ xfs_fs_mount(
        return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
 }
 
-static int
+static long
 xfs_fs_nr_cached_objects(
-       struct super_block      *sb)
+       struct super_block      *sb,
+       int                     nid)
 {
        return xfs_reclaim_inodes_count(XFS_M(sb));
 }
 
-static void
+static long
 xfs_fs_free_cached_objects(
        struct super_block      *sb,
-       int                     nr_to_scan)
+       long                    nr_to_scan,
+       int                     nid)
 {
-       xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
+       return xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
 }
 
 static const struct super_operations xfs_super_operations = {
index 2f2a7c0..f622a97 100644 (file)
@@ -41,6 +41,7 @@
 #include "xfs_trans_space.h"
 #include "xfs_trace.h"
 #include "xfs_symlink.h"
+#include "xfs_buf_item.h"
 
 /* ----- Kernel only functions below ----- */
 STATIC int
@@ -363,6 +364,7 @@ xfs_symlink(
                        pathlen -= byte_cnt;
                        offset += byte_cnt;
 
+                       xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SYMLINK_BUF);
                        xfs_trans_log_buf(tp, bp, 0, (buf + byte_cnt - 1) -
                                                        (char *)bp->b_addr);
                }
index 1bdf965..d9c92da 100644 (file)
@@ -27,15 +27,13 @@ struct kiocb;
  */
 #define KIOCB_CANCELLED                ((void *) (~0ULL))
 
-typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
+typedef int (kiocb_cancel_fn)(struct kiocb *);
 
 struct kiocb {
-       atomic_t                ki_users;
-
        struct file             *ki_filp;
        struct kioctx           *ki_ctx;        /* NULL for sync ops */
        kiocb_cancel_fn         *ki_cancel;
-       void                    (*ki_dtor)(struct kiocb *);
+       void                    *private;
 
        union {
                void __user             *user;
@@ -44,17 +42,7 @@ struct kiocb {
 
        __u64                   ki_user_data;   /* user's data for completion */
        loff_t                  ki_pos;
-
-       void                    *private;
-       /* State that we remember to be able to restart/retry  */
-       unsigned short          ki_opcode;
-       size_t                  ki_nbytes;      /* copy of iocb->aio_nbytes */
-       char                    __user *ki_buf; /* remaining iocb->aio_buf */
-       size_t                  ki_left;        /* remaining bytes */
-       struct iovec            ki_inline_vec;  /* inline vector */
-       struct iovec            *ki_iovec;
-       unsigned long           ki_nr_segs;
-       unsigned long           ki_cur_seg;
+       size_t                  ki_nbytes;      /* copy of iocb->aio_nbytes */
 
        struct list_head        ki_list;        /* the aio core uses this
                                                 * for cancellation */
@@ -74,7 +62,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb)
 static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 {
        *kiocb = (struct kiocb) {
-                       .ki_users = ATOMIC_INIT(1),
                        .ki_ctx = NULL,
                        .ki_filp = filp,
                        .ki_obj.tsk = current,
@@ -84,7 +71,6 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
 /* prototypes */
 #ifdef CONFIG_AIO
 extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern void aio_put_req(struct kiocb *iocb);
 extern void aio_complete(struct kiocb *iocb, long res, long res2);
 struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
@@ -93,7 +79,6 @@ extern long do_io_submit(aio_context_t ctx_id, long nr,
 void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
 static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline void aio_put_req(struct kiocb *iocb) { }
 static inline void aio_complete(struct kiocb *iocb, long res, long res2) { }
 struct mm_struct;
 static inline void exit_aio(struct mm_struct *mm) { }
index 8013a45..cf573c2 100644 (file)
@@ -13,6 +13,9 @@ struct file_operations;
 struct file *anon_inode_getfile(const char *name,
                                const struct file_operations *fops,
                                void *priv, int flags);
+struct file *anon_inode_getfile_private(const char *name,
+                               const struct file_operations *fops,
+                               void *priv, int flags);
 int anon_inode_getfd(const char *name, const struct file_operations *fops,
                     void *priv, int flags);
 
index c388155..5f66d51 100644 (file)
@@ -243,6 +243,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
  * BDI_CAP_EXEC_MAP:       Can be mapped for execution
  *
  * BDI_CAP_SWAP_BACKED:    Count shmem/tmpfs objects as swap-backed.
+ *
+ * BDI_CAP_STRICTLIMIT:    Keep number of dirty pages below bdi threshold.
  */
 #define BDI_CAP_NO_ACCT_DIRTY  0x00000001
 #define BDI_CAP_NO_WRITEBACK   0x00000002
@@ -254,6 +256,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio);
 #define BDI_CAP_NO_ACCT_WB     0x00000080
 #define BDI_CAP_SWAP_BACKED    0x00000100
 #define BDI_CAP_STABLE_WRITES  0x00000200
+#define BDI_CAP_STRICTLIMIT    0x00000400
 
 #define BDI_CAP_VMFLAGS \
        (BDI_CAP_READ_MAP | BDI_CAP_WRITE_MAP | BDI_CAP_EXEC_MAP)
index 70cf138..e8112ae 100644 (file)
@@ -31,7 +31,7 @@ struct linux_binprm {
 #ifdef __alpha__
        unsigned int taso:1;
 #endif
-       unsigned int recursion_depth;
+       unsigned int recursion_depth; /* only for search_binary_handler() */
        struct file * file;
        struct cred *cred;      /* new credentials */
        int unsafe;             /* how unsafe this exec is (mask of LSM_UNSAFE_*) */
diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h
new file mode 100644 (file)
index 0000000..98e892e
--- /dev/null
@@ -0,0 +1,43 @@
+/*
+ * Parsing command line, get the partitions information.
+ *
+ * Written by Cai Zhiyong <caizhiyong@huawei.com>
+ *
+ */
+#ifndef CMDLINEPARSEH
+#define CMDLINEPARSEH
+
+#include <linux/blkdev.h>
+
+/* partition flags */
+#define PF_RDONLY                   0x01 /* Device is read only */
+#define PF_POWERUP_LOCK             0x02 /* Always locked after reset */
+
+struct cmdline_subpart {
+       char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */
+       sector_t from;
+       sector_t size;
+       int flags;
+       struct cmdline_subpart *next_subpart;
+};
+
+struct cmdline_parts {
+       char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */
+       unsigned int nr_subparts;
+       struct cmdline_subpart *subpart;
+       struct cmdline_parts *next_parts;
+};
+
+void cmdline_parts_free(struct cmdline_parts **parts);
+
+int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline);
+
+struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts,
+                                        const char *bdev);
+
+void cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size,
+                      int slot,
+                      int (*add_part)(int, struct cmdline_subpart *, void *),
+                      void *param);
+
+#endif /* CMDLINEPARSEH */
index ec1aee4..345da00 100644 (file)
@@ -43,6 +43,7 @@
 #define COMPAT_SYSCALL_DEFINEx(x, name, ...)                           \
        asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
        static inline long C_SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));\
+       asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));\
        asmlinkage long compat_SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))\
        {                                                               \
                return C_SYSC##name(__MAP(x,__SC_DELOUSE,__VA_ARGS__)); \
index 1739510..bdd18ca 100644 (file)
@@ -52,8 +52,6 @@ static inline void *cpu_rmap_lookup_obj(struct cpu_rmap *rmap, unsigned int cpu)
        return rmap->obj[rmap->near[cpu].index];
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-
 /**
  * alloc_irq_cpu_rmap - allocate CPU affinity reverse-map for IRQs
  * @size: Number of objects to be mapped
@@ -68,5 +66,4 @@ extern void free_irq_cpu_rmap(struct cpu_rmap *rmap);
 
 extern int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq);
 
-#endif
 #endif /* __LINUX_CPU_RMAP_H */
index d568f39..fcabc42 100644 (file)
@@ -85,7 +85,6 @@ struct cpufreq_policy {
        struct list_head        policy_list;
        struct kobject          kobj;
        struct completion       kobj_unregister;
-       int                     transition_ongoing; /* Tracks transition status */
 };
 
 /* Only for ACPI */
index 37e4f8d..fe68a5a 100644 (file)
 extern unsigned long long elfcorehdr_addr;
 extern unsigned long long elfcorehdr_size;
 
+extern int __weak elfcorehdr_alloc(unsigned long long *addr,
+                                  unsigned long long *size);
+extern void __weak elfcorehdr_free(unsigned long long addr);
+extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos);
+extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
+                                        unsigned long from, unsigned long pfn,
+                                        unsigned long size, pgprot_t prot);
+
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
                                                unsigned long, int);
 
index feaa8d8..59066e0 100644 (file)
@@ -55,11 +55,11 @@ struct qstr {
 #define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
 
 struct dentry_stat_t {
-       int nr_dentry;
-       int nr_unused;
-       int age_limit;          /* age in seconds */
-       int want_pages;         /* pages requested by system */
-       int dummy[2];
+       long nr_dentry;
+       long nr_unused;
+       long age_limit;          /* age in seconds */
+       long want_pages;         /* pages requested by system */
+       long dummy[2];
 };
 extern struct dentry_stat_t dentry_stat;
 
@@ -395,4 +395,8 @@ static inline bool d_mountpoint(const struct dentry *dentry)
 
 extern int sysctl_vfs_cache_pressure;
 
+static inline unsigned long vfs_pressure_ratio(unsigned long val)
+{
+       return mult_frac(val, sysctl_vfs_cache_pressure, 100);
+}
 #endif /* __LINUX_DCACHE_H */
index 529d871..3f40547 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/list.h>
+#include <linux/list_lru.h>
 #include <linux/llist.h>
 #include <linux/radix-tree.h>
 #include <linux/rbtree.h>
@@ -1269,15 +1270,6 @@ struct super_block {
        struct list_head        s_files;
 #endif
        struct list_head        s_mounts;       /* list of mounts; _not_ for fs use */
-       /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */
-       struct list_head        s_dentry_lru;   /* unused dentry lru */
-       int                     s_nr_dentry_unused;     /* # of dentry on lru */
-
-       /* s_inode_lru_lock protects s_inode_lru and s_nr_inodes_unused */
-       spinlock_t              s_inode_lru_lock ____cacheline_aligned_in_smp;
-       struct list_head        s_inode_lru;            /* unused inode lru */
-       int                     s_nr_inodes_unused;     /* # of inodes on lru */
-
        struct block_device     *s_bdev;
        struct backing_dev_info *s_bdi;
        struct mtd_info         *s_mtd;
@@ -1331,11 +1323,14 @@ struct super_block {
 
        /* AIO completions deferred from interrupt context */
        struct workqueue_struct *s_dio_done_wq;
-};
 
-/* superblock cache pruning functions */
-extern void prune_icache_sb(struct super_block *sb, int nr_to_scan);
-extern void prune_dcache_sb(struct super_block *sb, int nr_to_scan);
+       /*
+        * Keep the lru lists last in the structure so they always sit on their
+        * own individual cachelines.
+        */
+       struct list_lru         s_dentry_lru ____cacheline_aligned_in_smp;
+       struct list_lru         s_inode_lru ____cacheline_aligned_in_smp;
+};
 
 extern struct timespec current_fs_time(struct super_block *sb);
 
@@ -1629,8 +1624,8 @@ struct super_operations {
        ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
 #endif
        int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
-       int (*nr_cached_objects)(struct super_block *);
-       void (*free_cached_objects)(struct super_block *, int);
+       long (*nr_cached_objects)(struct super_block *, int);
+       long (*free_cached_objects)(struct super_block *, long, int);
 };
 
 /*
@@ -2074,6 +2069,7 @@ extern struct super_block *freeze_bdev(struct block_device *);
 extern void emergency_thaw_all(void);
 extern int thaw_bdev(struct block_device *bdev, struct super_block *sb);
 extern int fsync_bdev(struct block_device *);
+extern int sb_is_blkdev_sb(struct super_block *sb);
 #else
 static inline void bd_forget(struct inode *inode) {}
 static inline int sync_blockdev(struct block_device *bdev) { return 0; }
@@ -2093,6 +2089,11 @@ static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 static inline void iterate_bdevs(void (*f)(struct block_device *, void *), void *arg)
 {
 }
+
+static inline int sb_is_blkdev_sb(struct super_block *sb)
+{
+       return 0;
+}
 #endif
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
@@ -2494,7 +2495,6 @@ extern const struct file_operations generic_ro_fops;
 #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
 
 extern int vfs_readlink(struct dentry *, char __user *, int, const char *);
-extern int vfs_follow_link(struct nameidata *, const char *);
 extern int page_readlink(struct dentry *, char __user *, int);
 extern void *page_follow_link_light(struct dentry *, struct nameidata *);
 extern void page_put_link(struct dentry *, struct nameidata *, void *);
index 2b93a9a..0efc3e6 100644 (file)
@@ -39,17 +39,6 @@ static inline void get_fs_pwd(struct fs_struct *fs, struct path *pwd)
        spin_unlock(&fs->lock);
 }
 
-static inline void get_fs_root_and_pwd(struct fs_struct *fs, struct path *root,
-                                      struct path *pwd)
-{
-       spin_lock(&fs->lock);
-       *root = fs->root;
-       path_get(root);
-       *pwd = fs->pwd;
-       path_get(pwd);
-       spin_unlock(&fs->lock);
-}
-
 extern bool current_chrooted(void);
 
 #endif /* _LINUX_FS_STRUCT_H */
index 661d374..f8d41cb 100644 (file)
@@ -66,8 +66,8 @@ struct gen_pool_chunk {
        struct list_head next_chunk;    /* next chunk in pool */
        atomic_t avail;
        phys_addr_t phys_addr;          /* physical starting address of memory chunk */
-       unsigned long start_addr;       /* starting address of memory chunk */
-       unsigned long end_addr;         /* ending address of memory chunk */
+       unsigned long start_addr;       /* start address of memory chunk */
+       unsigned long end_addr;         /* end address of memory chunk (inclusive) */
        unsigned long bits[0];          /* bitmap for allocating memory chunk */
 };
 
index ccfe17c..1e04106 100644 (file)
@@ -7,11 +7,7 @@
 #include <linux/vtime.h>
 
 
-#if defined(CONFIG_SMP) || defined(CONFIG_GENERIC_HARDIRQS)
 extern void synchronize_irq(unsigned int irq);
-#else
-# define synchronize_irq(irq)  barrier()
-#endif
 
 #if defined(CONFIG_TINY_RCU)
 
index ee1ffc5..31b9d29 100644 (file)
@@ -756,6 +756,10 @@ u8 *hid_alloc_report_buf(struct hid_report *report, gfp_t flags);
 struct hid_device *hid_allocate_device(void);
 struct hid_report *hid_register_report(struct hid_device *device, unsigned type, unsigned id);
 int hid_parse_report(struct hid_device *hid, __u8 *start, unsigned size);
+struct hid_report *hid_validate_values(struct hid_device *hid,
+                                      unsigned int type, unsigned int id,
+                                      unsigned int field_index,
+                                      unsigned int report_counts);
 int hid_open_report(struct hid_device *device);
 int hid_check_keys_pressed(struct hid_device *hid);
 int hid_connect(struct hid_device *hid, unsigned int connect_mask);
index b60de92..3935428 100644 (file)
@@ -96,9 +96,6 @@ extern int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
                          pmd_t *dst_pmd, pmd_t *src_pmd,
                          struct vm_area_struct *vma,
                          unsigned long addr, unsigned long end);
-extern int handle_pte_fault(struct mm_struct *mm,
-                           struct vm_area_struct *vma, unsigned long address,
-                           pte_t *pte, pmd_t *pmd, unsigned int flags);
 extern int split_huge_page_to_list(struct page *page, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
index c2b1801..0393270 100644 (file)
@@ -66,6 +66,9 @@ int hugetlb_reserve_pages(struct inode *inode, long from, long to,
                                                vm_flags_t vm_flags);
 void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed);
 int dequeue_hwpoisoned_huge_page(struct page *page);
+bool isolate_huge_page(struct page *page, struct list_head *list);
+void putback_active_hugepage(struct page *page);
+bool is_hugepage_active(struct page *page);
 void copy_huge_page(struct page *dst, struct page *src);
 
 #ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE
@@ -134,6 +137,9 @@ static inline int dequeue_hwpoisoned_huge_page(struct page *page)
        return 0;
 }
 
+#define isolate_huge_page(p, l) false
+#define putback_active_hugepage(p)     do {} while (0)
+#define is_hugepage_active(x)  false
 static inline void copy_huge_page(struct page *dst, struct page *src)
 {
 }
@@ -261,6 +267,8 @@ struct huge_bootmem_page {
 };
 
 struct page *alloc_huge_page_node(struct hstate *h, int nid);
+struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
+                               unsigned long addr, int avoid_reserve);
 
 /* arch callback */
 int __init alloc_bootmem_huge_page(struct hstate *h);
@@ -371,9 +379,23 @@ static inline pgoff_t basepage_index(struct page *page)
        return __basepage_index(page);
 }
 
+extern void dissolve_free_huge_pages(unsigned long start_pfn,
+                                    unsigned long end_pfn);
+int pmd_huge_support(void);
+/*
+ * Currently hugepage migration is enabled only for pmd-based hugepage.
+ * This function will be updated when hugepage migration is more widely
+ * supported.
+ */
+static inline int hugepage_migration_support(struct hstate *h)
+{
+       return pmd_huge_support() && (huge_page_shift(h) == PMD_SHIFT);
+}
+
 #else  /* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 #define alloc_huge_page_node(h, nid) NULL
+#define alloc_huge_page_noerr(v, a, r) NULL
 #define alloc_bootmem_huge_page(h) NULL
 #define hstate_file(f) NULL
 #define hstate_sizelog(s) NULL
@@ -396,6 +418,9 @@ static inline pgoff_t basepage_index(struct page *page)
 {
        return page->index;
 }
+#define dissolve_free_huge_pages(s, e) do {} while (0)
+#define pmd_huge_support()     0
+#define hugepage_migration_support(h)  0
 #endif /* CONFIG_HUGETLB_PAGE */
 
 #endif /* _LINUX_HUGETLB_H */
index e73f2b7..f1c27a7 100644 (file)
@@ -153,6 +153,7 @@ extern unsigned int reset_devices;
 void setup_arch(char **);
 void prepare_namespace(void);
 void __init load_default_modules(void);
+int __init init_rootfs(void);
 
 extern void (*late_time_init)(void);
 
index 5fa5afe..5e865b5 100644 (file)
@@ -120,7 +120,6 @@ struct irqaction {
 
 extern irqreturn_t no_action(int cpl, void *dev_id);
 
-#ifdef CONFIG_GENERIC_HARDIRQS
 extern int __must_check
 request_threaded_irq(unsigned int irq, irq_handler_t handler,
                     irq_handler_t thread_fn,
@@ -140,40 +139,6 @@ request_any_context_irq(unsigned int irq, irq_handler_t handler,
 extern int __must_check
 request_percpu_irq(unsigned int irq, irq_handler_t handler,
                   const char *devname, void __percpu *percpu_dev_id);
-#else
-
-extern int __must_check
-request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
-           const char *name, void *dev);
-
-/*
- * Special function to avoid ifdeffery in kernel/irq/devres.c which
- * gets magically built by GENERIC_HARDIRQS=n architectures (sparc,
- * m68k). I really love these $@%#!* obvious Makefile references:
- * ../../../kernel/irq/devres.o
- */
-static inline int __must_check
-request_threaded_irq(unsigned int irq, irq_handler_t handler,
-                    irq_handler_t thread_fn,
-                    unsigned long flags, const char *name, void *dev)
-{
-       return request_irq(irq, handler, flags, name, dev);
-}
-
-static inline int __must_check
-request_any_context_irq(unsigned int irq, irq_handler_t handler,
-                       unsigned long flags, const char *name, void *dev_id)
-{
-       return request_irq(irq, handler, flags, name, dev_id);
-}
-
-static inline int __must_check
-request_percpu_irq(unsigned int irq, irq_handler_t handler,
-                  const char *devname, void __percpu *percpu_dev_id)
-{
-       return request_irq(irq, handler, 0, devname, percpu_dev_id);
-}
-#endif
 
 extern void free_irq(unsigned int, void *);
 extern void free_percpu_irq(unsigned int, void __percpu *);
@@ -221,7 +186,6 @@ extern void enable_irq(unsigned int irq);
 extern void enable_percpu_irq(unsigned int irq, unsigned int type);
 
 /* The following three functions are for the core kernel use only. */
-#ifdef CONFIG_GENERIC_HARDIRQS
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
 #ifdef CONFIG_PM_SLEEP
@@ -229,13 +193,8 @@ extern int check_wakeup_irqs(void);
 #else
 static inline int check_wakeup_irqs(void) { return 0; }
 #endif
-#else
-static inline void suspend_device_irqs(void) { };
-static inline void resume_device_irqs(void) { };
-static inline int check_wakeup_irqs(void) { return 0; }
-#endif
 
-#if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_HARDIRQS)
+#if defined(CONFIG_SMP)
 
 extern cpumask_var_t irq_default_affinity;
 
@@ -287,9 +246,8 @@ static inline int irq_set_affinity_hint(unsigned int irq,
 {
        return -EINVAL;
 }
-#endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
+#endif /* CONFIG_SMP */
 
-#ifdef CONFIG_GENERIC_HARDIRQS
 /*
  * Special lockdep variants of irq disabling/enabling.
  * These should be used for locking constructs that
@@ -354,33 +312,6 @@ static inline int disable_irq_wake(unsigned int irq)
        return irq_set_irq_wake(irq, 0);
 }
 
-#else /* !CONFIG_GENERIC_HARDIRQS */
-/*
- * NOTE: non-genirq architectures, if they want to support the lock
- * validator need to define the methods below in their asm/irq.h
- * files, under an #ifdef CONFIG_LOCKDEP section.
- */
-#ifndef CONFIG_LOCKDEP
-#  define disable_irq_nosync_lockdep(irq)      disable_irq_nosync(irq)
-#  define disable_irq_nosync_lockdep_irqsave(irq, flags) \
-                                               disable_irq_nosync(irq)
-#  define disable_irq_lockdep(irq)             disable_irq(irq)
-#  define enable_irq_lockdep(irq)              enable_irq(irq)
-#  define enable_irq_lockdep_irqrestore(irq, flags) \
-                                               enable_irq(irq)
-# endif
-
-static inline int enable_irq_wake(unsigned int irq)
-{
-       return 0;
-}
-
-static inline int disable_irq_wake(unsigned int irq)
-{
-       return 0;
-}
-#endif /* CONFIG_GENERIC_HARDIRQS */
-
 
 #ifdef CONFIG_IRQ_FORCED_THREADING
 extern bool force_irqthreads;
@@ -655,7 +586,7 @@ void tasklet_hrtimer_cancel(struct tasklet_hrtimer *ttimer)
  * if more than one irq occurred.
  */
 
-#if defined(CONFIG_GENERIC_HARDIRQS) && !defined(CONFIG_GENERIC_IRQ_PROBE) 
+#if !defined(CONFIG_GENERIC_IRQ_PROBE) 
 static inline unsigned long probe_irq_on(void)
 {
        return 0;
index 3aeb730..7ea319e 100644 (file)
@@ -58,10 +58,26 @@ struct iommu_domain {
 #define IOMMU_CAP_CACHE_COHERENCY      0x1
 #define IOMMU_CAP_INTR_REMAP           0x2     /* isolates device intrs */
 
+/*
+ * Following constraints are specifc to FSL_PAMUV1:
+ *  -aperture must be power of 2, and naturally aligned
+ *  -number of windows must be power of 2, and address space size
+ *   of each window is determined by aperture size / # of windows
+ *  -the actual size of the mapped region of a window must be power
+ *   of 2 starting with 4KB and physical address must be naturally
+ *   aligned.
+ * DOMAIN_ATTR_FSL_PAMUV1 corresponds to the above mentioned contraints.
+ * The caller can invoke iommu_domain_get_attr to check if the underlying
+ * iommu implementation supports these constraints.
+ */
+
 enum iommu_attr {
        DOMAIN_ATTR_GEOMETRY,
        DOMAIN_ATTR_PAGING,
        DOMAIN_ATTR_WINDOWS,
+       DOMAIN_ATTR_FSL_PAMU_STASH,
+       DOMAIN_ATTR_FSL_PAMU_ENABLE,
+       DOMAIN_ATTR_FSL_PAMUV1,
        DOMAIN_ATTR_MAX,
 };
 
index c4d870b..19c19a5 100644 (file)
@@ -22,7 +22,7 @@ struct ipc_ids {
        int in_use;
        unsigned short seq;
        unsigned short seq_max;
-       struct rw_semaphore rw_mutex;
+       struct rw_semaphore rwsem;
        struct idr ipcs_idr;
        int next_id;
 };
index f04d3ba..56bb0dc 100644 (file)
@@ -382,8 +382,6 @@ extern void irq_cpu_online(void);
 extern void irq_cpu_offline(void);
 extern int __irq_set_affinity_locked(struct irq_data *data,  const struct cpumask *cpumask);
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-
 #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ)
 void irq_move_irq(struct irq_data *data);
 void irq_move_masked_irq(struct irq_data *data);
@@ -802,11 +800,4 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
 static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 #endif
 
-#else /* !CONFIG_GENERIC_HARDIRQS */
-
-extern struct msi_desc *irq_get_msi_desc(unsigned int irq);
-extern int irq_set_msi_desc(unsigned int irq, struct msi_desc *entry);
-
-#endif /* CONFIG_GENERIC_HARDIRQS */
-
 #endif /* _LINUX_IRQ_H */
index 623325e..56fb646 100644 (file)
@@ -76,8 +76,6 @@ struct irq_desc {
 extern struct irq_desc irq_desc[NR_IRQS];
 #endif
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-
 static inline struct irq_data *irq_desc_get_irq_data(struct irq_desc *desc)
 {
        return &desc->irq_data;
@@ -173,6 +171,5 @@ __irq_set_preflow_handler(unsigned int irq, irq_preflow_handler_t handler)
        desc->preflow_handler = handler;
 }
 #endif
-#endif
 
 #endif
index 0a2dc46..fdd5cc1 100644 (file)
@@ -4,23 +4,6 @@
 #include <uapi/linux/irqnr.h>
 
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-#include <asm/irq.h>
-
-/*
- * Wrappers for non-genirq architectures:
- */
-#define nr_irqs                        NR_IRQS
-#define irq_to_desc(irq)       (&irq_desc[irq])
-
-# define for_each_irq_desc(irq, desc)          \
-       for (irq = 0; irq < nr_irqs; irq++)
-
-# define for_each_irq_desc_reverse(irq, desc)                          \
-       for (irq = nr_irqs - 1; irq >= 0; irq--)
-
-#else /* CONFIG_GENERIC_HARDIRQS */
-
 extern int nr_irqs;
 extern struct irq_desc *irq_to_desc(unsigned int irq);
 unsigned int irq_get_next_irq(unsigned int offset);
@@ -50,8 +33,6 @@ unsigned int irq_get_next_irq(unsigned int offset);
        for (irq = irq_get_next_irq(0); irq < nr_irqs;  \
             irq = irq_get_next_irq(irq + 1))
 
-#endif /* CONFIG_GENERIC_HARDIRQS */
-
 #define for_each_irq_nr(irq)                   \
        for (irq = 0; irq < nr_irqs; irq++)
 
index ed5f6ed..51c72be 100644 (file)
@@ -36,9 +36,6 @@ struct kernel_cpustat {
 };
 
 struct kernel_stat {
-#ifndef CONFIG_GENERIC_HARDIRQS
-       unsigned int irqs[NR_IRQS];
-#endif
        unsigned long irqs_sum;
        unsigned int softirqs[NR_SOFTIRQS];
 };
@@ -54,22 +51,6 @@ DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
 extern unsigned long long nr_context_switches(void);
 
-#ifndef CONFIG_GENERIC_HARDIRQS
-
-struct irq_desc;
-
-static inline void kstat_incr_irqs_this_cpu(unsigned int irq,
-                                           struct irq_desc *desc)
-{
-       __this_cpu_inc(kstat.irqs[irq]);
-       __this_cpu_inc(kstat.irqs_sum);
-}
-
-static inline unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
-{
-       return kstat_cpu(cpu).irqs[irq];
-}
-#else
 #include <linux/irq.h>
 extern unsigned int kstat_irqs_cpu(unsigned int irq, int cpu);
 
@@ -79,8 +60,6 @@ do {                                                  \
        __this_cpu_inc(kstat.irqs_sum);                 \
 } while (0)
 
-#endif
-
 static inline void kstat_incr_softirqs_this_cpu(unsigned int irq)
 {
        __this_cpu_inc(kstat.softirqs[irq]);
@@ -94,20 +73,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
 /*
  * Number of interrupts per specific IRQ source, since bootup
  */
-#ifndef CONFIG_GENERIC_HARDIRQS
-static inline unsigned int kstat_irqs(unsigned int irq)
-{
-       unsigned int sum = 0;
-       int cpu;
-
-       for_each_possible_cpu(cpu)
-               sum += kstat_irqs_cpu(irq, cpu);
-
-       return sum;
-}
-#else
 extern unsigned int kstat_irqs(unsigned int irq);
-#endif
 
 /*
  * Number of interrupts per cpu, since bootup
index ca1d27a..925eaf2 100644 (file)
@@ -264,10 +264,36 @@ extern void arch_arm_kprobe(struct kprobe *p);
 extern void arch_disarm_kprobe(struct kprobe *p);
 extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
-extern kprobe_opcode_t *get_insn_slot(void);
-extern void free_insn_slot(kprobe_opcode_t *slot, int dirty);
 extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
+struct kprobe_insn_cache {
+       struct mutex mutex;
+       void *(*alloc)(void);   /* allocate insn page */
+       void (*free)(void *);   /* free insn page */
+       struct list_head pages; /* list of kprobe_insn_page */
+       size_t insn_size;       /* size of instruction slot */
+       int nr_garbage;
+};
+
+extern kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c);
+extern void __free_insn_slot(struct kprobe_insn_cache *c,
+                            kprobe_opcode_t *slot, int dirty);
+
+#define DEFINE_INSN_CACHE_OPS(__name)                                  \
+extern struct kprobe_insn_cache kprobe_##__name##_slots;               \
+                                                                       \
+static inline kprobe_opcode_t *get_##__name##_slot(void)               \
+{                                                                      \
+       return __get_insn_slot(&kprobe_##__name##_slots);               \
+}                                                                      \
+                                                                       \
+static inline void free_##__name##_slot(kprobe_opcode_t *slot, int dirty)\
+{                                                                      \
+       __free_insn_slot(&kprobe_##__name##_slots, slot, dirty);        \
+}                                                                      \
+
+DEFINE_INSN_CACHE_OPS(insn);
+
 #ifdef CONFIG_OPTPROBES
 /*
  * Internal structure for direct jump optimized probe
@@ -287,13 +313,13 @@ extern void arch_optimize_kprobes(struct list_head *oplist);
 extern void arch_unoptimize_kprobes(struct list_head *oplist,
                                    struct list_head *done_list);
 extern void arch_unoptimize_kprobe(struct optimized_kprobe *op);
-extern kprobe_opcode_t *get_optinsn_slot(void);
-extern void free_optinsn_slot(kprobe_opcode_t *slot, int dirty);
 extern int arch_within_optimized_kprobe(struct optimized_kprobe *op,
                                        unsigned long addr);
 
 extern void opt_pre_handler(struct kprobe *p, struct pt_regs *regs);
 
+DEFINE_INSN_CACHE_OPS(optinsn);
+
 #ifdef CONFIG_SYSCTL
 extern int sysctl_kprobes_optimization;
 extern int proc_kprobes_optimization_handler(struct ctl_table *table,
index ca645a0..0fbbc7a 100644 (file)
@@ -533,6 +533,7 @@ int gfn_to_page_many_atomic(struct kvm *kvm, gfn_t gfn, struct page **pages,
 
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
diff --git a/include/linux/list_lru.h b/include/linux/list_lru.h
new file mode 100644 (file)
index 0000000..3ce5417
--- /dev/null
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
+ * Authors: David Chinner and Glauber Costa
+ *
+ * Generic LRU infrastructure
+ */
+#ifndef _LRU_LIST_H
+#define _LRU_LIST_H
+
+#include <linux/list.h>
+#include <linux/nodemask.h>
+
+/* list_lru_walk_cb has to always return one of those */
+enum lru_status {
+       LRU_REMOVED,            /* item removed from list */
+       LRU_ROTATE,             /* item referenced, give another pass */
+       LRU_SKIP,               /* item cannot be locked, skip */
+       LRU_RETRY,              /* item not freeable. May drop the lock
+                                  internally, but has to return locked. */
+};
+
+struct list_lru_node {
+       spinlock_t              lock;
+       struct list_head        list;
+       /* kept as signed so we can catch imbalance bugs */
+       long                    nr_items;
+} ____cacheline_aligned_in_smp;
+
+struct list_lru {
+       struct list_lru_node    *node;
+       nodemask_t              active_nodes;
+};
+
+void list_lru_destroy(struct list_lru *lru);
+int list_lru_init(struct list_lru *lru);
+
+/**
+ * list_lru_add: add an element to the lru list's tail
+ * @list_lru: the lru pointer
+ * @item: the item to be added.
+ *
+ * If the element is already part of a list, this function returns doing
+ * nothing. Therefore the caller does not need to keep state about whether or
+ * not the element already belongs in the list and is allowed to lazy update
+ * it. Note however that this is valid for *a* list, not *this* list. If
+ * the caller organize itself in a way that elements can be in more than
+ * one type of list, it is up to the caller to fully remove the item from
+ * the previous list (with list_lru_del() for instance) before moving it
+ * to @list_lru
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool list_lru_add(struct list_lru *lru, struct list_head *item);
+
+/**
+ * list_lru_del: delete an element to the lru list
+ * @list_lru: the lru pointer
+ * @item: the item to be deleted.
+ *
+ * This function works analogously as list_lru_add in terms of list
+ * manipulation. The comments about an element already pertaining to
+ * a list are also valid for list_lru_del.
+ *
+ * Return value: true if the list was updated, false otherwise
+ */
+bool list_lru_del(struct list_lru *lru, struct list_head *item);
+
+/**
+ * list_lru_count_node: return the number of objects currently held by @lru
+ * @lru: the lru pointer.
+ * @nid: the node id to count from.
+ *
+ * Always return a non-negative number, 0 for empty lists. There is no
+ * guarantee that the list is not updated while the count is being computed.
+ * Callers that want such a guarantee need to provide an outer lock.
+ */
+unsigned long list_lru_count_node(struct list_lru *lru, int nid);
+static inline unsigned long list_lru_count(struct list_lru *lru)
+{
+       long count = 0;
+       int nid;
+
+       for_each_node_mask(nid, lru->active_nodes)
+               count += list_lru_count_node(lru, nid);
+
+       return count;
+}
+
+typedef enum lru_status
+(*list_lru_walk_cb)(struct list_head *item, spinlock_t *lock, void *cb_arg);
+/**
+ * list_lru_walk_node: walk a list_lru, isolating and disposing freeable items.
+ * @lru: the lru pointer.
+ * @nid: the node id to scan from.
+ * @isolate: callback function that is resposible for deciding what to do with
+ *  the item currently being scanned
+ * @cb_arg: opaque type that will be passed to @isolate
+ * @nr_to_walk: how many items to scan.
+ *
+ * This function will scan all elements in a particular list_lru, calling the
+ * @isolate callback for each of those items, along with the current list
+ * spinlock and a caller-provided opaque. The @isolate callback can choose to
+ * drop the lock internally, but *must* return with the lock held. The callback
+ * will return an enum lru_status telling the list_lru infrastructure what to
+ * do with the object being scanned.
+ *
+ * Please note that nr_to_walk does not mean how many objects will be freed,
+ * just how many objects will be scanned.
+ *
+ * Return value: the number of objects effectively removed from the LRU.
+ */
+unsigned long list_lru_walk_node(struct list_lru *lru, int nid,
+                                list_lru_walk_cb isolate, void *cb_arg,
+                                unsigned long *nr_to_walk);
+
+static inline unsigned long
+list_lru_walk(struct list_lru *lru, list_lru_walk_cb isolate,
+             void *cb_arg, unsigned long nr_to_walk)
+{
+       long isolated = 0;
+       int nid;
+
+       for_each_node_mask(nid, lru->active_nodes) {
+               isolated += list_lru_walk_node(lru, nid, isolate,
+                                              cb_arg, &nr_to_walk);
+               if (nr_to_walk <= 0)
+                       break;
+       }
+       return isolated;
+}
+#endif /* _LRU_LIST_H */
index d21c13f..4356686 100644 (file)
@@ -67,8 +67,8 @@ int lz4hc_compress(const unsigned char *src, size_t src_len,
  *     note :  Destination buffer must be already allocated.
  *             slightly faster than lz4_decompress_unknownoutputsize()
  */
-int lz4_decompress(const char *src, size_t *src_len, char *dest,
-               size_t actual_dest_len);
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+               unsigned char *dest, size_t actual_dest_len);
 
 /*
  * lz4_decompress_unknownoutputsize()
@@ -82,6 +82,6 @@ int lz4_decompress(const char *src, size_t *src_len, char *dest,
  *               Error if return (< 0)
  *     note :  Destination buffer must be already allocated.
  */
-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
-               char *dest, size_t *dest_len);
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+               unsigned char *dest, size_t *dest_len);
 #endif
index f388203..31e95ac 100644 (file)
@@ -60,6 +60,8 @@ int memblock_reserve(phys_addr_t base, phys_addr_t size);
 void memblock_trim_memory(phys_addr_t align);
 
 #ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+int memblock_search_pfn_nid(unsigned long pfn, unsigned long *start_pfn,
+                           unsigned long  *end_pfn);
 void __next_mem_pfn_range(int *idx, int nid, unsigned long *out_start_pfn,
                          unsigned long *out_end_pfn, int *out_nid);
 
index 6c41609..60e9587 100644 (file)
@@ -30,9 +30,21 @@ struct page;
 struct mm_struct;
 struct kmem_cache;
 
-/* Stats that can be updated by kernel. */
-enum mem_cgroup_page_stat_item {
-       MEMCG_NR_FILE_MAPPED, /* # of pages charged as file rss */
+/*
+ * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c,
+ * These two lists should keep in accord with each other.
+ */
+enum mem_cgroup_stat_index {
+       /*
+        * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
+        */
+       MEM_CGROUP_STAT_CACHE,          /* # of pages charged as cache */
+       MEM_CGROUP_STAT_RSS,            /* # of pages charged as anon rss */
+       MEM_CGROUP_STAT_RSS_HUGE,       /* # of pages charged as anon huge */
+       MEM_CGROUP_STAT_FILE_MAPPED,    /* # of pages charged as file rss */
+       MEM_CGROUP_STAT_WRITEBACK,      /* # of pages under writeback */
+       MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
+       MEM_CGROUP_STAT_NSTATS,
 };
 
 struct mem_cgroup_reclaim_cookie {
@@ -41,6 +53,23 @@ struct mem_cgroup_reclaim_cookie {
        unsigned int generation;
 };
 
+enum mem_cgroup_filter_t {
+       VISIT,          /* visit current node */
+       SKIP,           /* skip the current node and continue traversal */
+       SKIP_TREE,      /* skip the whole subtree and continue traversal */
+};
+
+/*
+ * mem_cgroup_filter_t predicate might instruct mem_cgroup_iter_cond how to
+ * iterate through the hierarchy tree. Each tree element is checked by the
+ * predicate before it is returned by the iterator. If a filter returns
+ * SKIP or SKIP_TREE then the iterator code continues traversal (with the
+ * next node down the hierarchy or the next node that doesn't belong under the
+ * memcg's subtree).
+ */
+typedef enum mem_cgroup_filter_t
+(*mem_cgroup_iter_filter)(struct mem_cgroup *memcg, struct mem_cgroup *root);
+
 #ifdef CONFIG_MEMCG
 /*
  * All "charge" functions with gfp_mask should use GFP_KERNEL or
@@ -108,9 +137,18 @@ mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
 extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,
        struct page *oldpage, struct page *newpage, bool migration_ok);
 
-struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *,
-                                  struct mem_cgroup *,
-                                  struct mem_cgroup_reclaim_cookie *);
+struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
+                                  struct mem_cgroup *prev,
+                                  struct mem_cgroup_reclaim_cookie *reclaim,
+                                  mem_cgroup_iter_filter cond);
+
+static inline struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+                                  struct mem_cgroup *prev,
+                                  struct mem_cgroup_reclaim_cookie *reclaim)
+{
+       return mem_cgroup_iter_cond(root, prev, reclaim, NULL);
+}
+
 void mem_cgroup_iter_break(struct mem_cgroup *, struct mem_cgroup *);
 
 /*
@@ -125,6 +163,48 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,
 extern void mem_cgroup_replace_page_cache(struct page *oldpage,
                                        struct page *newpage);
 
+/**
+ * mem_cgroup_toggle_oom - toggle the memcg OOM killer for the current task
+ * @new: true to enable, false to disable
+ *
+ * Toggle whether a failed memcg charge should invoke the OOM killer
+ * or just return -ENOMEM.  Returns the previous toggle state.
+ *
+ * NOTE: Any path that enables the OOM killer before charging must
+ *       call mem_cgroup_oom_synchronize() afterward to finalize the
+ *       OOM handling and clean up.
+ */
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+       bool old;
+
+       old = current->memcg_oom.may_oom;
+       current->memcg_oom.may_oom = new;
+
+       return old;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+       bool old = mem_cgroup_toggle_oom(true);
+
+       WARN_ON(old == true);
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+       bool old = mem_cgroup_toggle_oom(false);
+
+       WARN_ON(old == false);
+}
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+       return p->memcg_oom.in_memcg_oom;
+}
+
+bool mem_cgroup_oom_synchronize(void);
+
 #ifdef CONFIG_MEMCG_SWAP
 extern int do_swap_account;
 #endif
@@ -165,24 +245,24 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 }
 
 void mem_cgroup_update_page_stat(struct page *page,
-                                enum mem_cgroup_page_stat_item idx,
+                                enum mem_cgroup_stat_index idx,
                                 int val);
 
 static inline void mem_cgroup_inc_page_stat(struct page *page,
-                                           enum mem_cgroup_page_stat_item idx)
+                                           enum mem_cgroup_stat_index idx)
 {
        mem_cgroup_update_page_stat(page, idx, 1);
 }
 
 static inline void mem_cgroup_dec_page_stat(struct page *page,
-                                           enum mem_cgroup_page_stat_item idx)
+                                           enum mem_cgroup_stat_index idx)
 {
        mem_cgroup_update_page_stat(page, idx, -1);
 }
 
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                               gfp_t gfp_mask,
-                                               unsigned long *total_scanned);
+enum mem_cgroup_filter_t
+mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+               struct mem_cgroup *root);
 
 void __mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);
 static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
@@ -296,6 +376,15 @@ static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg,
                struct page *oldpage, struct page *newpage, bool migration_ok)
 {
 }
+static inline struct mem_cgroup *
+mem_cgroup_iter_cond(struct mem_cgroup *root,
+               struct mem_cgroup *prev,
+               struct mem_cgroup_reclaim_cookie *reclaim,
+               mem_cgroup_iter_filter cond)
+{
+       /* first call must return non-NULL, second return NULL */
+       return (struct mem_cgroup *)(unsigned long)!prev;
+}
 
 static inline struct mem_cgroup *
 mem_cgroup_iter(struct mem_cgroup *root,
@@ -348,22 +437,45 @@ static inline void mem_cgroup_end_update_page_stat(struct page *page,
 {
 }
 
+static inline bool mem_cgroup_toggle_oom(bool new)
+{
+       return false;
+}
+
+static inline void mem_cgroup_enable_oom(void)
+{
+}
+
+static inline void mem_cgroup_disable_oom(void)
+{
+}
+
+static inline bool task_in_memcg_oom(struct task_struct *p)
+{
+       return false;
+}
+
+static inline bool mem_cgroup_oom_synchronize(void)
+{
+       return false;
+}
+
 static inline void mem_cgroup_inc_page_stat(struct page *page,
-                                           enum mem_cgroup_page_stat_item idx)
+                                           enum mem_cgroup_stat_index idx)
 {
 }
 
 static inline void mem_cgroup_dec_page_stat(struct page *page,
-                                           enum mem_cgroup_page_stat_item idx)
+                                           enum mem_cgroup_stat_index idx)
 {
 }
 
 static inline
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
+enum mem_cgroup_filter_t
+mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+               struct mem_cgroup *root)
 {
-       return 0;
+       return VISIT;
 }
 
 static inline void mem_cgroup_split_huge_fixup(struct page *head)
index 0d7df39..da6716b 100644 (file)
@@ -91,7 +91,6 @@ static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
 }
 
 #define vma_policy(vma) ((vma)->vm_policy)
-#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))
 
 static inline void mpol_get(struct mempolicy *pol)
 {
@@ -126,6 +125,7 @@ struct shared_policy {
        spinlock_t lock;
 };
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst);
 void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
 int mpol_set_shared_policy(struct shared_policy *info,
                                struct vm_area_struct *vma,
@@ -173,7 +173,7 @@ extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol);
 /* Check if a vma is migratable */
 static inline int vma_migratable(struct vm_area_struct *vma)
 {
-       if (vma->vm_flags & (VM_IO | VM_HUGETLB | VM_PFNMAP))
+       if (vma->vm_flags & (VM_IO | VM_PFNMAP))
                return 0;
        /*
         * Migration allocates pages in the highest zone. If we cannot
@@ -240,7 +240,12 @@ mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
 }
 
 #define vma_policy(vma) NULL
-#define vma_set_policy(vma, pol) do {} while(0)
+
+static inline int
+vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+       return 0;
+}
 
 static inline void numa_policy_init(void)
 {
index a405d3d..8d3c57f 100644 (file)
@@ -41,8 +41,6 @@ extern int migrate_page(struct address_space *,
                        struct page *, struct page *, enum migrate_mode);
 extern int migrate_pages(struct list_head *l, new_page_t x,
                unsigned long private, enum migrate_mode mode, int reason);
-extern int migrate_huge_page(struct page *, new_page_t x,
-               unsigned long private, enum migrate_mode mode);
 
 extern int fail_migrate_page(struct address_space *,
                        struct page *, struct page *);
@@ -55,6 +53,9 @@ extern int migrate_vmas(struct mm_struct *mm,
 extern void migrate_page_copy(struct page *newpage, struct page *page);
 extern int migrate_huge_page_move_mapping(struct address_space *mapping,
                                  struct page *newpage, struct page *page);
+extern int migrate_page_move_mapping(struct address_space *mapping,
+               struct page *newpage, struct page *page,
+               struct buffer_head *head, enum migrate_mode mode);
 #else
 
 static inline void putback_lru_pages(struct list_head *l) {}
@@ -62,9 +63,6 @@ static inline void putback_movable_pages(struct list_head *l) {}
 static inline int migrate_pages(struct list_head *l, new_page_t x,
                unsigned long private, enum migrate_mode mode, int reason)
        { return -ENOSYS; }
-static inline int migrate_huge_page(struct page *page, new_page_t x,
-               unsigned long private, enum migrate_mode mode)
-       { return -ENOSYS; }
 
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
index d2d59b4..8b6e55e 100644 (file)
@@ -115,6 +115,12 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_ARCH_1      0x01000000      /* Architecture-specific flag */
 #define VM_DONTDUMP    0x04000000      /* Do not include in the core dump */
 
+#ifdef CONFIG_MEM_SOFT_DIRTY
+# define VM_SOFTDIRTY  0x08000000      /* Not soft dirty clean area */
+#else
+# define VM_SOFTDIRTY  0
+#endif
+
 #define VM_MIXEDMAP    0x10000000      /* Can contain "struct page" and pure PFN pages */
 #define VM_HUGEPAGE    0x20000000      /* MADV_HUGEPAGE marked this vma */
 #define VM_NOHUGEPAGE  0x40000000      /* MADV_NOHUGEPAGE marked this vma */
@@ -170,6 +176,7 @@ extern pgprot_t protection_map[16];
 #define FAULT_FLAG_RETRY_NOWAIT        0x10    /* Don't drop mmap_sem and wait when retrying */
 #define FAULT_FLAG_KILLABLE    0x20    /* The fault task is in SIGKILL killable region */
 #define FAULT_FLAG_TRIED       0x40    /* second try */
+#define FAULT_FLAG_USER                0x80    /* The fault originated in userspace */
 
 /*
  * vm_fault is filled by the the pagefault handler and passed to the vma's
@@ -489,20 +496,6 @@ static inline int compound_order(struct page *page)
        return (unsigned long)page[1].lru.prev;
 }
 
-static inline int compound_trans_order(struct page *page)
-{
-       int order;
-       unsigned long flags;
-
-       if (!PageHead(page))
-               return 0;
-
-       flags = compound_lock_irqsave(page);
-       order = compound_order(page);
-       compound_unlock_irqrestore(page, flags);
-       return order;
-}
-
 static inline void set_compound_order(struct page *page, unsigned long order)
 {
        page[1].lru.prev = (void *)order;
@@ -637,12 +630,12 @@ static inline enum zone_type page_zonenum(const struct page *page)
 #endif
 
 /*
- * The identification function is only used by the buddy allocator for
- * determining if two pages could be buddies. We are not really
- * identifying a zone since we could be using a the section number
- * id if we have not node id available in page flags.
- * We guarantee only that it will return the same value for two
- * combinable pages in a zone.
+ * The identification function is mainly used by the buddy allocator for
+ * determining if two pages could be buddies. We are not really identifying
+ * the zone since we could be using the section number id if we do not have
+ * node id available in page flags.
+ * We only guarantee that it will return the same value for two combinable
+ * pages in a zone.
  */
 static inline int page_zone_id(struct page *page)
 {
@@ -884,11 +877,12 @@ static inline int page_mapped(struct page *page)
 #define VM_FAULT_NOPAGE        0x0100  /* ->fault installed the pte, not return page */
 #define VM_FAULT_LOCKED        0x0200  /* ->fault locked the returned page */
 #define VM_FAULT_RETRY 0x0400  /* ->fault blocked, must retry */
+#define VM_FAULT_FALLBACK 0x0800       /* huge page fault failed, fall back to small */
 
 #define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */
 
 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | \
-                        VM_FAULT_HWPOISON_LARGE)
+                        VM_FAULT_FALLBACK | VM_FAULT_HWPOISON_LARGE)
 
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((x) << 12)
@@ -992,7 +986,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
        unmap_mapping_range(mapping, holebegin, holelen, 0);
 }
 
-extern void truncate_pagecache(struct inode *inode, loff_t old, loff_t new);
+extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
index 1397ccf..cf55945 100644 (file)
@@ -2,6 +2,7 @@
 #define LINUX_MM_INLINE_H
 
 #include <linux/huge_mm.h>
+#include <linux/swap.h>
 
 /**
  * page_is_file_cache - should the page be on a file LRU or anon LRU?
index faf4b7c..d9851ee 100644 (file)
@@ -322,6 +322,7 @@ struct mm_rss_stat {
        atomic_long_t count[NR_MM_COUNTERS];
 };
 
+struct kioctx_table;
 struct mm_struct {
        struct vm_area_struct * mmap;           /* list of VMAs */
        struct rb_root mm_rb;
@@ -383,8 +384,8 @@ struct mm_struct {
 
        struct core_state *core_state; /* coredumping support */
 #ifdef CONFIG_AIO
-       spinlock_t              ioctx_lock;
-       struct hlist_head       ioctx_list;
+       spinlock_t                      ioctx_lock;
+       struct kioctx_table __rcu       *ioctx_table;
 #endif
 #ifdef CONFIG_MM_OWNER
        /*
index af4a3b7..bd791e4 100644 (file)
@@ -105,6 +105,7 @@ struct zone_padding {
 enum zone_stat_item {
        /* First 128 byte cacheline (assuming 64 bit words) */
        NR_FREE_PAGES,
+       NR_ALLOC_BATCH,
        NR_LRU_BASE,
        NR_INACTIVE_ANON = NR_LRU_BASE, /* must match order of LRU_[IN]ACTIVE */
        NR_ACTIVE_ANON,         /*  "     "     "   "       "         */
@@ -352,7 +353,6 @@ struct zone {
         * free areas of different sizes
         */
        spinlock_t              lock;
-       int                     all_unreclaimable; /* All pages pinned */
 #if defined CONFIG_COMPACTION || defined CONFIG_CMA
        /* Set to true when the PG_migrate_skip bits should be cleared */
        bool                    compact_blockskip_flush;
index bc95b2b..97fbecd 100644 (file)
 #define PCI_DEVICE_ID_HP_CISSE         0x323a
 #define PCI_DEVICE_ID_HP_CISSF         0x323b
 #define PCI_DEVICE_ID_HP_CISSH         0x323c
+#define PCI_DEVICE_ID_HP_CISSI         0x3239
 #define PCI_DEVICE_ID_HP_ZX2_IOC       0x4031
 
 #define PCI_VENDOR_ID_PCTECH           0x1042
diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h
new file mode 100644 (file)
index 0000000..0b23edb
--- /dev/null
@@ -0,0 +1,60 @@
+#ifndef __PERCPU_IDA_H__
+#define __PERCPU_IDA_H__
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/spinlock_types.h>
+#include <linux/wait.h>
+#include <linux/cpumask.h>
+
+struct percpu_ida_cpu;
+
+struct percpu_ida {
+       /*
+        * number of tags available to be allocated, as passed to
+        * percpu_ida_init()
+        */
+       unsigned                        nr_tags;
+
+       struct percpu_ida_cpu __percpu  *tag_cpu;
+
+       /*
+        * Bitmap of cpus that (may) have tags on their percpu freelists:
+        * steal_tags() uses this to decide when to steal tags, and which cpus
+        * to try stealing from.
+        *
+        * It's ok for a freelist to be empty when its bit is set - steal_tags()
+        * will just keep looking - but the bitmap _must_ be set whenever a
+        * percpu freelist does have tags.
+        */
+       cpumask_t                       cpus_have_tags;
+
+       struct {
+               spinlock_t              lock;
+               /*
+                * When we go to steal tags from another cpu (see steal_tags()),
+                * we want to pick a cpu at random. Cycling through them every
+                * time we steal is a bit easier and more or less equivalent:
+                */
+               unsigned                cpu_last_stolen;
+
+               /* For sleeping on allocation failure */
+               wait_queue_head_t       wait;
+
+               /*
+                * Global freelist - it's a stack where nr_free points to the
+                * top
+                */
+               unsigned                nr_free;
+               unsigned                *freelist;
+       } ____cacheline_aligned_in_smp;
+};
+
+int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp);
+void percpu_ida_free(struct percpu_ida *pool, unsigned tag);
+
+void percpu_ida_destroy(struct percpu_ida *pool);
+int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags);
+
+#endif /* __PERCPU_IDA_H__ */
diff --git a/include/linux/platform_data/exynos_thermal.h b/include/linux/platform_data/exynos_thermal.h
deleted file mode 100644 (file)
index da7e627..0000000
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * exynos_thermal.h - Samsung EXYNOS TMU (Thermal Management Unit)
- *
- *  Copyright (C) 2011 Samsung Electronics
- *  Donggeun Kim <dg77.kim@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef _LINUX_EXYNOS_THERMAL_H
-#define _LINUX_EXYNOS_THERMAL_H
-#include <linux/cpu_cooling.h>
-
-enum calibration_type {
-       TYPE_ONE_POINT_TRIMMING,
-       TYPE_TWO_POINT_TRIMMING,
-       TYPE_NONE,
-};
-
-enum soc_type {
-       SOC_ARCH_EXYNOS4210 = 1,
-       SOC_ARCH_EXYNOS,
-};
-/**
- * struct freq_clip_table
- * @freq_clip_max: maximum frequency allowed for this cooling state.
- * @temp_level: Temperature level at which the temperature clipping will
- *     happen.
- * @mask_val: cpumask of the allowed cpu's where the clipping will take place.
- *
- * This structure is required to be filled and passed to the
- * cpufreq_cooling_unregister function.
- */
-struct freq_clip_table {
-       unsigned int freq_clip_max;
-       unsigned int temp_level;
-       const struct cpumask *mask_val;
-};
-
-/**
- * struct exynos_tmu_platform_data
- * @threshold: basic temperature for generating interrupt
- *            25 <= threshold <= 125 [unit: degree Celsius]
- * @threshold_falling: differntial value for setting threshold
- *                    of temperature falling interrupt.
- * @trigger_levels: array for each interrupt levels
- *     [unit: degree Celsius]
- *     0: temperature for trigger_level0 interrupt
- *        condition for trigger_level0 interrupt:
- *             current temperature > threshold + trigger_levels[0]
- *     1: temperature for trigger_level1 interrupt
- *        condition for trigger_level1 interrupt:
- *             current temperature > threshold + trigger_levels[1]
- *     2: temperature for trigger_level2 interrupt
- *        condition for trigger_level2 interrupt:
- *             current temperature > threshold + trigger_levels[2]
- *     3: temperature for trigger_level3 interrupt
- *        condition for trigger_level3 interrupt:
- *             current temperature > threshold + trigger_levels[3]
- * @trigger_level0_en:
- *     1 = enable trigger_level0 interrupt,
- *     0 = disable trigger_level0 interrupt
- * @trigger_level1_en:
- *     1 = enable trigger_level1 interrupt,
- *     0 = disable trigger_level1 interrupt
- * @trigger_level2_en:
- *     1 = enable trigger_level2 interrupt,
- *     0 = disable trigger_level2 interrupt
- * @trigger_level3_en:
- *     1 = enable trigger_level3 interrupt,
- *     0 = disable trigger_level3 interrupt
- * @gain: gain of amplifier in the positive-TC generator block
- *     0 <= gain <= 15
- * @reference_voltage: reference voltage of amplifier
- *     in the positive-TC generator block
- *     0 <= reference_voltage <= 31
- * @noise_cancel_mode: noise cancellation mode
- *     000, 100, 101, 110 and 111 can be different modes
- * @type: determines the type of SOC
- * @efuse_value: platform defined fuse value
- * @cal_type: calibration type for temperature
- * @freq_clip_table: Table representing frequency reduction percentage.
- * @freq_tab_count: Count of the above table as frequency reduction may
- *     applicable to only some of the trigger levels.
- *
- * This structure is required for configuration of exynos_tmu driver.
- */
-struct exynos_tmu_platform_data {
-       u8 threshold;
-       u8 threshold_falling;
-       u8 trigger_levels[4];
-       bool trigger_level0_en;
-       bool trigger_level1_en;
-       bool trigger_level2_en;
-       bool trigger_level3_en;
-
-       u8 gain;
-       u8 reference_voltage;
-       u8 noise_cancel_mode;
-       u32 efuse_value;
-
-       enum calibration_type cal_type;
-       enum soc_type type;
-       struct freq_clip_table freq_tab[4];
-       unsigned int freq_tab_count;
-};
-#endif /* _LINUX_EXYNOS_THERMAL_H */
index 202e290..51a2ff5 100644 (file)
@@ -36,6 +36,13 @@ struct lp55xx_predef_pattern {
        u8 size_b;
 };
 
+enum lp8501_pwr_sel {
+       LP8501_ALL_VDD,         /* D1~9 are connected to VDD */
+       LP8501_6VDD_3VOUT,      /* D1~6 with VDD, D7~9 with VOUT */
+       LP8501_3VDD_6VOUT,      /* D1~6 with VOUT, D7~9 with VDD */
+       LP8501_ALL_VOUT,        /* D1~9 are connected to VOUT */
+};
+
 /*
  * struct lp55xx_platform_data
  * @led_config        : Configurable led class device
@@ -67,6 +74,9 @@ struct lp55xx_platform_data {
        /* Predefined pattern data */
        struct lp55xx_predef_pattern *patterns;
        unsigned int num_patterns;
+
+       /* LP8501 specific */
+       enum lp8501_pwr_sel pwr_sel;
 };
 
 #endif /* _LEDS_LP55XX_H */
diff --git a/include/linux/platform_data/leds-pca9633.h b/include/linux/platform_data/leds-pca9633.h
deleted file mode 100644 (file)
index c5bf29b..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * PCA9633 LED chip driver.
- *
- * Copyright 2012 bct electronic GmbH
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA
- */
-
-#ifndef __LINUX_PCA9633_H
-#define __LINUX_PCA9633_H
-#include <linux/leds.h>
-
-enum pca9633_outdrv {
-       PCA9633_OPEN_DRAIN,
-       PCA9633_TOTEM_POLE, /* aka push-pull */
-};
-
-struct pca9633_platform_data {
-       struct led_platform_data leds;
-       enum pca9633_outdrv outdrv;
-};
-
-#endif /* __LINUX_PCA9633_H*/
diff --git a/include/linux/platform_data/leds-pca963x.h b/include/linux/platform_data/leds-pca963x.h
new file mode 100644 (file)
index 0000000..e731f00
--- /dev/null
@@ -0,0 +1,42 @@
+/*
+ * PCA963X LED chip driver.
+ *
+ * Copyright 2012 bct electronic GmbH
+ * Copyright 2013 Qtechnology A/S
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
+ * 02110-1301 USA
+ */
+
+#ifndef __LINUX_PCA963X_H
+#define __LINUX_PCA963X_H
+#include <linux/leds.h>
+
+enum pca963x_outdrv {
+       PCA963X_OPEN_DRAIN,
+       PCA963X_TOTEM_POLE, /* aka push-pull */
+};
+
+enum pca963x_blink_type {
+       PCA963X_SW_BLINK,
+       PCA963X_HW_BLINK,
+};
+
+struct pca963x_platform_data {
+       struct led_platform_data leds;
+       enum pca963x_outdrv outdrv;
+       enum pca963x_blink_type blink_type;
+};
+
+#endif /* __LINUX_PCA963X_H*/
index ffc444c..4039407 100644 (file)
@@ -231,6 +231,7 @@ unsigned long radix_tree_next_hole(struct radix_tree_root *root,
 unsigned long radix_tree_prev_hole(struct radix_tree_root *root,
                                unsigned long index, unsigned long max_scan);
 int radix_tree_preload(gfp_t gfp_mask);
+int radix_tree_maybe_preload(gfp_t gfp_mask);
 void radix_tree_init(void);
 void *radix_tree_tag_set(struct radix_tree_root *root,
                        unsigned long index, unsigned int tag);
index 69e37c2..753207c 100644 (file)
@@ -25,7 +25,7 @@ extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
 
 extern const struct file_operations ramfs_file_operations;
 extern const struct vm_operations_struct generic_file_vm_ops;
-extern int __init init_rootfs(void);
+extern int __init init_ramfs_fs(void);
 
 int ramfs_fill_super(struct super_block *sb, void *data, int silent);
 
index 0022c1b..aa870a4 100644 (file)
@@ -68,6 +68,10 @@ extern struct rb_node *rb_prev(const struct rb_node *);
 extern struct rb_node *rb_first(const struct rb_root *);
 extern struct rb_node *rb_last(const struct rb_root *);
 
+/* Postorder iteration - always visit the parent after its children */
+extern struct rb_node *rb_first_postorder(const struct rb_root *);
+extern struct rb_node *rb_next_postorder(const struct rb_node *);
+
 /* Fast replacement of a single node without remove/rebalance/add/rebalance */
 extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, 
                            struct rb_root *root);
@@ -81,4 +85,22 @@ static inline void rb_link_node(struct rb_node * node, struct rb_node * parent,
        *rb_link = node;
 }
 
+/**
+ * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of
+ * given type safe against removal of rb_node entry
+ *
+ * @pos:       the 'type *' to use as a loop cursor.
+ * @n:         another 'type *' to use as temporary storage
+ * @root:      'rb_root *' of the rbtree.
+ * @field:     the name of the rb_node field within 'type'.
+ */
+#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \
+       for (pos = rb_entry(rb_first_postorder(root), typeof(*pos), field),\
+               n = rb_entry(rb_next_postorder(&pos->field), \
+                       typeof(*pos), field); \
+            &pos->field; \
+            pos = n, \
+               n = rb_entry(rb_next_postorder(&pos->field), \
+                       typeof(*pos), field))
+
 #endif /* _LINUX_RBTREE_H */
index 96a509b..201a697 100644 (file)
@@ -54,7 +54,7 @@ struct res_counter {
        struct res_counter *parent;
 };
 
-#define RESOURCE_MAX (unsigned long long)LLONG_MAX
+#define RES_COUNTER_MAX ULLONG_MAX
 
 /**
  * Helpers to interact with userspace
index ce1e1c0..6682da3 100644 (file)
@@ -1393,6 +1393,13 @@ struct task_struct {
                unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
        } memcg_batch;
        unsigned int memcg_kmem_skip_account;
+       struct memcg_oom_info {
+               unsigned int may_oom:1;
+               unsigned int in_memcg_oom:1;
+               unsigned int oom_locked:1;
+               int wakeups;
+               struct mem_cgroup *wait_on_memcg;
+       } memcg_oom;
 #endif
 #ifdef CONFIG_UPROBES
        struct uprobe_task *utask;
@@ -2169,15 +2176,15 @@ static inline bool thread_group_leader(struct task_struct *p)
  * all we care about is that we have a task with the appropriate
  * pid, we don't actually care if we have the right task.
  */
-static inline int has_group_leader_pid(struct task_struct *p)
+static inline bool has_group_leader_pid(struct task_struct *p)
 {
-       return p->pid == p->tgid;
+       return task_pid(p) == p->signal->leader_pid;
 }
 
 static inline
-int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+bool same_thread_group(struct task_struct *p1, struct task_struct *p2)
 {
-       return p1->tgid == p2->tgid;
+       return p1->signal == p2->signal;
 }
 
 static inline struct task_struct *next_thread(const struct task_struct *p)
index 1829905..21a2093 100644 (file)
@@ -3,15 +3,21 @@
 /*
  * Reader/writer consistent mechanism without starving writers. This type of
  * lock for data where the reader wants a consistent set of information
- * and is willing to retry if the information changes.  Readers never
- * block but they may have to retry if a writer is in
- * progress. Writers do not wait for readers. 
+ * and is willing to retry if the information changes. There are two types
+ * of readers:
+ * 1. Sequence readers which never block a writer but they may have to retry
+ *    if a writer is in progress by detecting change in sequence number.
+ *    Writers do not wait for a sequence reader.
+ * 2. Locking readers which will wait if a writer or another locking reader
+ *    is in progress. A locking reader in progress will also block a writer
+ *    from going forward. Unlike the regular rwlock, the read lock here is
+ *    exclusive so that only one locking reader can get it.
  *
- * This is not as cache friendly as brlock. Also, this will not work
+ * This is not as cache friendly as brlock. Also, this may not work well
  * for data that contains pointers, because any writer could
  * invalidate a pointer that a reader was following.
  *
- * Expected reader usage:
+ * Expected non-blocking reader usage:
  *     do {
  *         seq = read_seqbegin(&foo);
  *     ...
@@ -268,4 +274,56 @@ write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
        spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+/*
+ * A locking reader exclusively locks out other writers and locking readers,
+ * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
+ * Don't need preempt_disable() because that is in the spin_lock already.
+ */
+static inline void read_seqlock_excl(seqlock_t *sl)
+{
+       spin_lock(&sl->lock);
+}
+
+static inline void read_sequnlock_excl(seqlock_t *sl)
+{
+       spin_unlock(&sl->lock);
+}
+
+static inline void read_seqlock_excl_bh(seqlock_t *sl)
+{
+       spin_lock_bh(&sl->lock);
+}
+
+static inline void read_sequnlock_excl_bh(seqlock_t *sl)
+{
+       spin_unlock_bh(&sl->lock);
+}
+
+static inline void read_seqlock_excl_irq(seqlock_t *sl)
+{
+       spin_lock_irq(&sl->lock);
+}
+
+static inline void read_sequnlock_excl_irq(seqlock_t *sl)
+{
+       spin_unlock_irq(&sl->lock);
+}
+
+static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&sl->lock, flags);
+       return flags;
+}
+
+#define read_seqlock_excl_irqsave(lock, flags)                         \
+       do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
+
+static inline void
+read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
+{
+       spin_unlock_irqrestore(&sl->lock, flags);
+}
+
 #endif /* __LINUX_SEQLOCK_H */
index ac6b8ee..68c0970 100644 (file)
@@ -4,39 +4,67 @@
 /*
  * This struct is used to pass information from page reclaim to the shrinkers.
  * We consolidate the values for easier extention later.
+ *
+ * The 'gfpmask' refers to the allocation we are currently trying to
+ * fulfil.
  */
 struct shrink_control {
        gfp_t gfp_mask;
 
-       /* How many slab objects shrinker() should scan and try to reclaim */
+       /*
+        * How many objects scan_objects should scan and try to reclaim.
+        * This is reset before every call, so it is safe for callees
+        * to modify.
+        */
        unsigned long nr_to_scan;
+
+       /* shrink from these nodes */
+       nodemask_t nodes_to_scan;
+       /* current node being shrunk (for NUMA aware shrinkers) */
+       int nid;
 };
 
+#define SHRINK_STOP (~0UL)
 /*
  * A callback you can register to apply pressure to ageable caches.
  *
- * 'sc' is passed shrink_control which includes a count 'nr_to_scan'
- * and a 'gfpmask'.  It should look through the least-recently-used
- * 'nr_to_scan' entries and attempt to free them up.  It should return
- * the number of objects which remain in the cache.  If it returns -1, it means
- * it cannot do any scanning at this time (eg. there is a risk of deadlock).
+ * @count_objects should return the number of freeable items in the cache. If
+ * there are no objects to free or the number of freeable items cannot be
+ * determined, it should return 0. No deadlock checks should be done during the
+ * count callback - the shrinker relies on aggregating scan counts that couldn't
+ * be executed due to potential deadlocks to be run at a later call when the
+ * deadlock condition is no longer pending.
  *
- * The 'gfpmask' refers to the allocation we are currently trying to
- * fulfil.
+ * @scan_objects will only be called if @count_objects returned a non-zero
+ * value for the number of freeable objects. The callout should scan the cache
+ * and attempt to free items from the cache. It should then return the number
+ * of objects freed during the scan, or SHRINK_STOP if progress cannot be made
+ * due to potential deadlocks. If SHRINK_STOP is returned, then no further
+ * attempts to call the @scan_objects will be made from the current reclaim
+ * context.
  *
- * Note that 'shrink' will be passed nr_to_scan == 0 when the VM is
- * querying the cache size, so a fastpath for that case is appropriate.
+ * @flags determine the shrinker abilities, like numa awareness
  */
 struct shrinker {
-       int (*shrink)(struct shrinker *, struct shrink_control *sc);
+       unsigned long (*count_objects)(struct shrinker *,
+                                      struct shrink_control *sc);
+       unsigned long (*scan_objects)(struct shrinker *,
+                                     struct shrink_control *sc);
+
        int seeks;      /* seeks to recreate an obj */
        long batch;     /* reclaim batch size, 0 = default */
+       unsigned long flags;
 
        /* These are for internal use */
        struct list_head list;
-       atomic_long_t nr_in_batch; /* objs pending delete */
+       /* objs pending delete, per node */
+       atomic_long_t *nr_deferred;
 };
 #define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
-extern void register_shrinker(struct shrinker *);
+
+/* Flags */
+#define SHRINKER_NUMA_AWARE (1 << 0)
+
+extern int register_shrinker(struct shrinker *);
 extern void unregister_shrinker(struct shrinker *);
 #endif
index 6c5cc0e..74f1058 100644 (file)
@@ -4,6 +4,8 @@
  * (C) SGI 2006, Christoph Lameter
  *     Cleaned up and restructured to ease the addition of alternative
  *     implementations of SLAB allocators.
+ * (C) Linux Foundation 2008-2013
+ *      Unified interface for all slab allocators
  */
 
 #ifndef _LINUX_SLAB_H
@@ -94,6 +96,7 @@
 #define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
                                (unsigned long)ZERO_SIZE_PTR)
 
+#include <linux/kmemleak.h>
 
 struct mem_cgroup;
 /*
@@ -289,6 +292,57 @@ static __always_inline int kmalloc_index(size_t size)
 }
 #endif /* !CONFIG_SLOB */
 
+void *__kmalloc(size_t size, gfp_t flags);
+void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags);
+
+#ifdef CONFIG_NUMA
+void *__kmalloc_node(size_t size, gfp_t flags, int node);
+void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+#else
+static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
+{
+       return __kmalloc(size, flags);
+}
+
+static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t flags, int node)
+{
+       return kmem_cache_alloc(s, flags);
+}
+#endif
+
+#ifdef CONFIG_TRACING
+extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
+
+#ifdef CONFIG_NUMA
+extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
+                                          gfp_t gfpflags,
+                                          int node, size_t size);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_trace(struct kmem_cache *s,
+                             gfp_t gfpflags,
+                             int node, size_t size)
+{
+       return kmem_cache_alloc_trace(s, gfpflags, size);
+}
+#endif /* CONFIG_NUMA */
+
+#else /* CONFIG_TRACING */
+static __always_inline void *kmem_cache_alloc_trace(struct kmem_cache *s,
+               gfp_t flags, size_t size)
+{
+       return kmem_cache_alloc(s, flags);
+}
+
+static __always_inline void *
+kmem_cache_alloc_node_trace(struct kmem_cache *s,
+                             gfp_t gfpflags,
+                             int node, size_t size)
+{
+       return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif /* CONFIG_TRACING */
+
 #ifdef CONFIG_SLAB
 #include <linux/slab_def.h>
 #endif
@@ -297,9 +351,60 @@ static __always_inline int kmalloc_index(size_t size)
 #include <linux/slub_def.h>
 #endif
 
-#ifdef CONFIG_SLOB
-#include <linux/slob_def.h>
+static __always_inline void *
+kmalloc_order(size_t size, gfp_t flags, unsigned int order)
+{
+       void *ret;
+
+       flags |= (__GFP_COMP | __GFP_KMEMCG);
+       ret = (void *) __get_free_pages(flags, order);
+       kmemleak_alloc(ret, size, 1, flags);
+       return ret;
+}
+
+#ifdef CONFIG_TRACING
+extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
+#else
+static __always_inline void *
+kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+{
+       return kmalloc_order(size, flags, order);
+}
+#endif
+
+static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
+{
+       unsigned int order = get_order(size);
+       return kmalloc_order_trace(size, flags, order);
+}
+
+/**
+ * kmalloc - allocate memory
+ * @size: how many bytes of memory are required.
+ * @flags: the type of memory to allocate (see kcalloc).
+ *
+ * kmalloc is the normal method of allocating memory
+ * for objects smaller than page size in the kernel.
+ */
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+       if (__builtin_constant_p(size)) {
+               if (size > KMALLOC_MAX_CACHE_SIZE)
+                       return kmalloc_large(size, flags);
+#ifndef CONFIG_SLOB
+               if (!(flags & GFP_DMA)) {
+                       int index = kmalloc_index(size);
+
+                       if (!index)
+                               return ZERO_SIZE_PTR;
+
+                       return kmem_cache_alloc_trace(kmalloc_caches[index],
+                                       flags, size);
+               }
 #endif
+       }
+       return __kmalloc(size, flags);
+}
 
 /*
  * Determine size used for the nth kmalloc cache.
@@ -321,6 +426,23 @@ static __always_inline int kmalloc_size(int n)
        return 0;
 }
 
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+{
+#ifndef CONFIG_SLOB
+       if (__builtin_constant_p(size) &&
+               size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
+               int i = kmalloc_index(size);
+
+               if (!i)
+                       return ZERO_SIZE_PTR;
+
+               return kmem_cache_alloc_node_trace(kmalloc_caches[i],
+                                               flags, node, size);
+       }
+#endif
+       return __kmalloc_node(size, flags, node);
+}
+
 /*
  * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
  * Intended for arches that get misalignment faults even for 64 bit integer
@@ -451,36 +573,6 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
        return kmalloc_array(n, size, flags | __GFP_ZERO);
 }
 
-#if !defined(CONFIG_NUMA) && !defined(CONFIG_SLOB)
-/**
- * kmalloc_node - allocate memory from a specific node
- * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate (see kmalloc).
- * @node: node to allocate from.
- *
- * kmalloc() for non-local nodes, used to allocate from a specific node
- * if available. Equivalent to kmalloc() in the non-NUMA single-node
- * case.
- */
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-{
-       return kmalloc(size, flags);
-}
-
-static inline void *__kmalloc_node(size_t size, gfp_t flags, int node)
-{
-       return __kmalloc(size, flags);
-}
-
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
-
-static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
-                                       gfp_t flags, int node)
-{
-       return kmem_cache_alloc(cachep, flags);
-}
-#endif /* !CONFIG_NUMA && !CONFIG_SLOB */
-
 /*
  * kmalloc_track_caller is a special version of kmalloc that records the
  * calling function of the routine calling it for slab leak tracking instead
index cd40158..e9346b4 100644 (file)
@@ -3,20 +3,6 @@
 
 /*
  * Definitions unique to the original Linux SLAB allocator.
- *
- * What we provide here is a way to optimize the frequent kmalloc
- * calls in the kernel by selecting the appropriate general cache
- * if kmalloc was called with a size that can be established at
- * compile time.
- */
-
-#include <linux/init.h>
-#include <linux/compiler.h>
-
-/*
- * struct kmem_cache
- *
- * manages a cache.
  */
 
 struct kmem_cache {
@@ -102,96 +88,4 @@ struct kmem_cache {
         */
 };
 
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
-void *__kmalloc(size_t size, gfp_t flags);
-
-#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t);
-#else
-static __always_inline void *
-kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
-{
-       return kmem_cache_alloc(cachep, flags);
-}
-#endif
-
-static __always_inline void *kmalloc(size_t size, gfp_t flags)
-{
-       struct kmem_cache *cachep;
-       void *ret;
-
-       if (__builtin_constant_p(size)) {
-               int i;
-
-               if (!size)
-                       return ZERO_SIZE_PTR;
-
-               if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
-                       return NULL;
-
-               i = kmalloc_index(size);
-
-#ifdef CONFIG_ZONE_DMA
-               if (flags & GFP_DMA)
-                       cachep = kmalloc_dma_caches[i];
-               else
-#endif
-                       cachep = kmalloc_caches[i];
-
-               ret = kmem_cache_alloc_trace(cachep, flags, size);
-
-               return ret;
-       }
-       return __kmalloc(size, flags);
-}
-
-#ifdef CONFIG_NUMA
-extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
-extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-
-#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
-                                        gfp_t flags,
-                                        int nodeid,
-                                        size_t size);
-#else
-static __always_inline void *
-kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
-                           gfp_t flags,
-                           int nodeid,
-                           size_t size)
-{
-       return kmem_cache_alloc_node(cachep, flags, nodeid);
-}
-#endif
-
-static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-{
-       struct kmem_cache *cachep;
-
-       if (__builtin_constant_p(size)) {
-               int i;
-
-               if (!size)
-                       return ZERO_SIZE_PTR;
-
-               if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
-                       return NULL;
-
-               i = kmalloc_index(size);
-
-#ifdef CONFIG_ZONE_DMA
-               if (flags & GFP_DMA)
-                       cachep = kmalloc_dma_caches[i];
-               else
-#endif
-                       cachep = kmalloc_caches[i];
-
-               return kmem_cache_alloc_node_trace(cachep, flags, node, size);
-       }
-       return __kmalloc_node(size, flags, node);
-}
-
-#endif /* CONFIG_NUMA */
-
 #endif /* _LINUX_SLAB_DEF_H */
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
deleted file mode 100644 (file)
index 095a5a4..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-#ifndef __LINUX_SLOB_DEF_H
-#define __LINUX_SLOB_DEF_H
-
-#include <linux/numa.h>
-
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-
-static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
-                                             gfp_t flags)
-{
-       return kmem_cache_alloc_node(cachep, flags, NUMA_NO_NODE);
-}
-
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-
-static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-{
-       return __kmalloc_node(size, flags, node);
-}
-
-static __always_inline void *kmalloc(size_t size, gfp_t flags)
-{
-       return __kmalloc_node(size, flags, NUMA_NO_NODE);
-}
-
-static __always_inline void *__kmalloc(size_t size, gfp_t flags)
-{
-       return kmalloc(size, flags);
-}
-
-#endif /* __LINUX_SLOB_DEF_H */
index 027276f..cc0b67e 100644 (file)
@@ -6,14 +6,8 @@
  *
  * (C) 2007 SGI, Christoph Lameter
  */
-#include <linux/types.h>
-#include <linux/gfp.h>
-#include <linux/bug.h>
-#include <linux/workqueue.h>
 #include <linux/kobject.h>
 
-#include <linux/kmemleak.h>
-
 enum stat_item {
        ALLOC_FASTPATH,         /* Allocation from cpu slab */
        ALLOC_SLOWPATH,         /* Allocation by getting a new cpu slab */
@@ -104,108 +98,4 @@ struct kmem_cache {
        struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
-void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
-void *__kmalloc(size_t size, gfp_t flags);
-
-static __always_inline void *
-kmalloc_order(size_t size, gfp_t flags, unsigned int order)
-{
-       void *ret;
-
-       flags |= (__GFP_COMP | __GFP_KMEMCG);
-       ret = (void *) __get_free_pages(flags, order);
-       kmemleak_alloc(ret, size, 1, flags);
-       return ret;
-}
-
-/**
- * Calling this on allocated memory will check that the memory
- * is expected to be in use, and print warnings if not.
- */
-#ifdef CONFIG_SLUB_DEBUG
-extern bool verify_mem_not_deleted(const void *x);
-#else
-static inline bool verify_mem_not_deleted(const void *x)
-{
-       return true;
-}
-#endif
-
-#ifdef CONFIG_TRACING
-extern void *
-kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
-extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order);
-#else
-static __always_inline void *
-kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
-{
-       return kmem_cache_alloc(s, gfpflags);
-}
-
-static __always_inline void *
-kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
-{
-       return kmalloc_order(size, flags, order);
-}
-#endif
-
-static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
-{
-       unsigned int order = get_order(size);
-       return kmalloc_order_trace(size, flags, order);
-}
-
-static __always_inline void *kmalloc(size_t size, gfp_t flags)
-{
-       if (__builtin_constant_p(size)) {
-               if (size > KMALLOC_MAX_CACHE_SIZE)
-                       return kmalloc_large(size, flags);
-
-               if (!(flags & GFP_DMA)) {
-                       int index = kmalloc_index(size);
-
-                       if (!index)
-                               return ZERO_SIZE_PTR;
-
-                       return kmem_cache_alloc_trace(kmalloc_caches[index],
-                                       flags, size);
-               }
-       }
-       return __kmalloc(size, flags);
-}
-
-#ifdef CONFIG_NUMA
-void *__kmalloc_node(size_t size, gfp_t flags, int node);
-void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-
-#ifdef CONFIG_TRACING
-extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
-                                          gfp_t gfpflags,
-                                          int node, size_t size);
-#else
-static __always_inline void *
-kmem_cache_alloc_node_trace(struct kmem_cache *s,
-                             gfp_t gfpflags,
-                             int node, size_t size)
-{
-       return kmem_cache_alloc_node(s, gfpflags, node);
-}
-#endif
-
-static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
-{
-       if (__builtin_constant_p(size) &&
-               size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
-               int index = kmalloc_index(size);
-
-               if (!index)
-                       return ZERO_SIZE_PTR;
-
-               return kmem_cache_alloc_node_trace(kmalloc_caches[index],
-                              flags, node, size);
-       }
-       return __kmalloc_node(size, flags, node);
-}
-#endif
-
 #endif /* _LINUX_SLUB_DEF_H */
index c181399..cfb7ca0 100644 (file)
@@ -28,6 +28,27 @@ extern unsigned int total_cpus;
 int smp_call_function_single(int cpuid, smp_call_func_t func, void *info,
                             int wait);
 
+/*
+ * Call a function on all processors
+ */
+int on_each_cpu(smp_call_func_t func, void *info, int wait);
+
+/*
+ * Call a function on processors specified by mask, which might include
+ * the local one.
+ */
+void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
+               void *info, bool wait);
+
+/*
+ * Call a function on each processor for which the supplied function
+ * cond_func returns a positive value. This may include the local
+ * processor.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+               smp_call_func_t func, void *info, bool wait,
+               gfp_t gfp_flags);
+
 #ifdef CONFIG_SMP
 
 #include <linux/preempt.h>
@@ -94,27 +115,6 @@ void generic_smp_call_function_single_interrupt(void);
 static inline void call_function_init(void) { }
 #endif
 
-/*
- * Call a function on all processors
- */
-int on_each_cpu(smp_call_func_t func, void *info, int wait);
-
-/*
- * Call a function on processors specified by mask, which might include
- * the local one.
- */
-void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
-               void *info, bool wait);
-
-/*
- * Call a function on each processor for which the supplied function
- * cond_func returns a positive value. This may include the local
- * processor.
- */
-void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
-               smp_call_func_t func, void *info, bool wait,
-               gfp_t gfp_flags);
-
 /*
  * Mark the boot cpu "online" so that it can call console drivers in
  * printk() and can access its per-cpu storage.
@@ -139,43 +139,6 @@ static inline int up_smp_call_function(smp_call_func_t func, void *info)
 }
 #define smp_call_function(func, info, wait) \
                        (up_smp_call_function(func, info))
-#define on_each_cpu(func, info, wait)          \
-       ({                                      \
-               unsigned long __flags;          \
-               local_irq_save(__flags);        \
-               func(info);                     \
-               local_irq_restore(__flags);     \
-               0;                              \
-       })
-/*
- * Note we still need to test the mask even for UP
- * because we actually can get an empty mask from
- * code that on SMP might call us without the local
- * CPU in the mask.
- */
-#define on_each_cpu_mask(mask, func, info, wait) \
-       do {                                            \
-               if (cpumask_test_cpu(0, (mask))) {      \
-                       local_irq_disable();            \
-                       (func)(info);                   \
-                       local_irq_enable();             \
-               }                                       \
-       } while (0)
-/*
- * Preemption is disabled here to make sure the cond_func is called under the
- * same condtions in UP and SMP.
- */
-#define on_each_cpu_cond(cond_func, func, info, wait, gfp_flags)\
-       do {                                                    \
-               void *__info = (info);                          \
-               preempt_disable();                              \
-               if ((cond_func)(0, __info)) {                   \
-                       local_irq_disable();                    \
-                       (func)(__info);                         \
-                       local_irq_enable();                     \
-               }                                               \
-               preempt_enable();                               \
-       } while (0)
 
 static inline void smp_send_reschedule(int cpu) { }
 #define smp_prepare_boot_cpu()                 do {} while (0)
index d95cde5..46ba0c6 100644 (file)
@@ -181,6 +181,33 @@ enum {
 #define COUNT_CONTINUED        0x80    /* See swap_map continuation for full count */
 #define SWAP_MAP_SHMEM 0xbf    /* Owned by shmem/tmpfs, in first swap_map */
 
+/*
+ * We use this to track usage of a cluster. A cluster is a block of swap disk
+ * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
+ * free clusters are organized into a list. We fetch an entry from the list to
+ * get a free cluster.
+ *
+ * The data field stores next cluster if the cluster is free or cluster usage
+ * counter otherwise. The flags field determines if a cluster is free. This is
+ * protected by swap_info_struct.lock.
+ */
+struct swap_cluster_info {
+       unsigned int data:24;
+       unsigned int flags:8;
+};
+#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
+#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */
+
+/*
+ * We assign a cluster to each CPU, so each CPU can allocate swap entry from
+ * its own cluster and swapout sequentially. The purpose is to optimize swapout
+ * throughput.
+ */
+struct percpu_cluster {
+       struct swap_cluster_info index; /* Current cluster index */
+       unsigned int next; /* Likely next allocation offset */
+};
+
 /*
  * The in-memory structure used to track swap areas.
  */
@@ -191,14 +218,16 @@ struct swap_info_struct {
        signed char     next;           /* next type on the swap list */
        unsigned int    max;            /* extent of the swap_map */
        unsigned char *swap_map;        /* vmalloc'ed array of usage counts */
+       struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
+       struct swap_cluster_info free_cluster_head; /* free cluster list head */
+       struct swap_cluster_info free_cluster_tail; /* free cluster list tail */
        unsigned int lowest_bit;        /* index of first free in swap_map */
        unsigned int highest_bit;       /* index of last free in swap_map */
        unsigned int pages;             /* total of usable pages of swap */
        unsigned int inuse_pages;       /* number of those currently in use */
        unsigned int cluster_next;      /* likely index for next allocation */
        unsigned int cluster_nr;        /* countdown to next cluster search */
-       unsigned int lowest_alloc;      /* while preparing discard cluster */
-       unsigned int highest_alloc;     /* while preparing discard cluster */
+       struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
        struct swap_extent *curr_swap_extent;
        struct swap_extent first_swap_extent;
        struct block_device *bdev;      /* swap device or bdev of swap file */
@@ -212,14 +241,18 @@ struct swap_info_struct {
                                         * protect map scan related fields like
                                         * swap_map, lowest_bit, highest_bit,
                                         * inuse_pages, cluster_next,
-                                        * cluster_nr, lowest_alloc and
-                                        * highest_alloc. other fields are only
-                                        * changed at swapon/swapoff, so are
-                                        * protected by swap_lock. changing
-                                        * flags need hold this lock and
-                                        * swap_lock. If both locks need hold,
-                                        * hold swap_lock first.
+                                        * cluster_nr, lowest_alloc,
+                                        * highest_alloc, free/discard cluster
+                                        * list. other fields are only changed
+                                        * at swapon/swapoff, so are protected
+                                        * by swap_lock. changing flags need
+                                        * hold this lock and swap_lock. If
+                                        * both locks need hold, hold swap_lock
+                                        * first.
                                         */
+       struct work_struct discard_work; /* discard worker */
+       struct swap_cluster_info discard_cluster_head; /* list head of discard clusters */
+       struct swap_cluster_info discard_cluster_tail; /* list tail of discard clusters */
 };
 
 struct swap_list_t {
@@ -247,7 +280,7 @@ extern void activate_page(struct page *);
 extern void mark_page_accessed(struct page *);
 extern void lru_add_drain(void);
 extern void lru_add_drain_cpu(int cpu);
-extern int lru_add_drain_all(void);
+extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_page(struct page *page);
 extern void swap_setup(void);
@@ -414,6 +447,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
 
 #else /* CONFIG_SWAP */
 
+#define swap_address_space(entry)              (NULL)
 #define get_nr_swap_pages()                    0L
 #define total_swap_pages                       0L
 #define total_swapcache_pages()                        0UL
index 84662ec..7fac04e 100644 (file)
@@ -186,6 +186,7 @@ extern struct trace_event_functions exit_syscall_print_funcs;
 #define __SYSCALL_DEFINEx(x, name, ...)                                        \
        asmlinkage long sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));      \
        static inline long SYSC##name(__MAP(x,__SC_DECL,__VA_ARGS__));  \
+       asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__));      \
        asmlinkage long SyS##name(__MAP(x,__SC_LONG,__VA_ARGS__))       \
        {                                                               \
                long ret = SYSC##name(__MAP(x,__SC_CAST,__VA_ARGS__));  \
index a386a1c..b268d3c 100644 (file)
@@ -207,6 +207,16 @@ struct thermal_bind_params {
         * See Documentation/thermal/sysfs-api.txt for more information.
         */
        int trip_mask;
+
+       /*
+        * This is an array of cooling state limits. Must have exactly
+        * 2 * thermal_zone.number_of_trip_points. It is an array consisting
+        * of tuples <lower-state upper-state> of state limits. Each trip
+        * will be associated with one state limit tuple when binding.
+        * A NULL pointer means <THERMAL_NO_LIMITS THERMAL_NO_LIMITS>
+        * on all trips.
+        */
+       unsigned long *binding_limits;
        int (*match) (struct thermal_zone_device *tz,
                        struct thermal_cooling_device *cdev);
 };
@@ -214,6 +224,14 @@ struct thermal_bind_params {
 /* Structure to define Thermal Zone parameters */
 struct thermal_zone_params {
        char governor_name[THERMAL_NAME_LENGTH];
+
+       /*
+        * a boolean to indicate if the thermal to hwmon sysfs interface
+        * is required. when no_hwmon == false, a hwmon sysfs interface
+        * will be created. when no_hwmon == true, nothing will be done
+        */
+       bool no_hwmon;
+
        int num_tbps;   /* Number of tbp entries */
        struct thermal_bind_params *tbp;
 };
diff --git a/include/linux/time-armada-370-xp.h b/include/linux/time-armada-370-xp.h
deleted file mode 100644 (file)
index 6fb0856..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/*
- * Marvell Armada 370/XP SoC timer handling.
- *
- * Copyright (C) 2012 Marvell
- *
- * Lior Amsalem <alior@marvell.com>
- * Gregory CLEMENT <gregory.clement@free-electrons.com>
- * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
- *
- */
-#ifndef __TIME_ARMADA_370_XPPRCMU_H
-#define __TIME_ARMADA_370_XPPRCMU_H
-
-void armada_370_xp_timer_init(void);
-
-#endif
index b3726e6..dd3edd7 100644 (file)
@@ -141,6 +141,7 @@ extern int do_adjtimex(struct timex *);
 extern void hardpps(const struct timespec *, const struct timespec *);
 
 int read_current_timer(unsigned long *timer_val);
+void ntp_notify_cmos_timer(void);
 
 /* The clock frequency of the i8253/i8254 PIT */
 #define PIT_TICK_RATE 1193182ul
index bd6cf61..1855f0a 100644 (file)
@@ -70,6 +70,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
                THP_ZERO_PAGE_ALLOC,
                THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_SMP
+               NR_TLB_REMOTE_FLUSH,    /* cpu tried to flush others' tlbs */
+               NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */
+#endif
+               NR_TLB_LOCAL_FLUSH_ALL,
+               NR_TLB_LOCAL_FLUSH_ONE,
                NR_VM_EVENT_ITEMS
 };
 
index c586679..e4b9480 100644 (file)
@@ -143,7 +143,6 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
 }
 
 extern unsigned long global_reclaimable_pages(void);
-extern unsigned long zone_reclaimable_pages(struct zone *zone);
 
 #ifdef CONFIG_NUMA
 /*
@@ -198,7 +197,7 @@ extern void __inc_zone_state(struct zone *, enum zone_stat_item);
 extern void dec_zone_state(struct zone *, enum zone_stat_item);
 extern void __dec_zone_state(struct zone *, enum zone_stat_item);
 
-void refresh_cpu_vm_stats(int);
+void cpu_vm_stats_fold(int cpu);
 void refresh_zone_stat_thresholds(void);
 
 void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
@@ -255,6 +254,7 @@ static inline void __dec_zone_page_state(struct page *page,
 
 static inline void refresh_cpu_vm_stats(int cpu) { }
 static inline void refresh_zone_stat_thresholds(void) { }
+static inline void cpu_vm_stats_fold(int cpu) { }
 
 static inline void drain_zonestat(struct zone *zone,
                        struct per_cpu_pageset *pset) { }
index 4e198ca..021b8a3 100644 (file)
@@ -98,8 +98,6 @@ int try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason);
 int try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr,
                                  enum wb_reason reason);
 void sync_inodes_sb(struct super_block *);
-long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages,
-                               enum wb_reason reason);
 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason);
 void inode_wait_for_writeback(struct inode *inode);
 
index 4c7c01a..c38a005 100644 (file)
@@ -26,6 +26,8 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+#include <linux/utsname.h>
+
 /* Number of requests per row */
 #define P9_ROW_MAXTAG 255
 
@@ -134,6 +136,7 @@ struct p9_req_t {
  * @tagpool - transaction id accounting for session
  * @reqs - 2D array of requests
  * @max_tag - current maximum tag id allocated
+ * @name - node name used as client id
  *
  * The client structure is used to keep track of various per-client
  * state that has been instantiated.
@@ -164,6 +167,8 @@ struct p9_client {
        struct p9_idpool *tagpool;
        struct p9_req_t *reqs[P9_ROW_MAXTAG];
        int max_tag;
+
+       char name[__NEW_UTS_LEN + 1];
 };
 
 /**
index 3c4211f..ea0cc26 100644 (file)
@@ -190,7 +190,9 @@ static inline struct neighbour *__ipv6_neigh_lookup(struct net_device *dev, cons
 }
 
 extern int                     ndisc_init(void);
+extern int                     ndisc_late_init(void);
 
+extern void                    ndisc_late_cleanup(void);
 extern void                    ndisc_cleanup(void);
 
 extern int                     ndisc_rcv(struct sk_buff *skb);
index d477bfb..66d42ed 100644 (file)
@@ -144,6 +144,7 @@ enum scsi_timeouts {
 #define ACCESS_CONTROL_IN     0x86
 #define ACCESS_CONTROL_OUT    0x87
 #define READ_16               0x88
+#define COMPARE_AND_WRITE     0x89
 #define WRITE_16              0x8a
 #define READ_ATTRIBUTE        0x8c
 #define WRITE_ATTRIBUTE              0x8d
index d35412a..fe66533 100644 (file)
@@ -55,7 +55,7 @@ struct rsnd_ssi_platform_info {
 /*
  * flags
  */
-#define RSND_SCU_USB_HPBIF             (1 << 31) /* it needs RSND_SSI_DEPENDENT */
+#define RSND_SCU_USE_HPBIF             (1 << 31) /* it needs RSND_SSI_DEPENDENT */
 
 struct rsnd_scu_platform_info {
        u32 flags;
index e5d09d2..a12589c 100644 (file)
@@ -6,13 +6,13 @@ struct iscsit_transport {
 #define ISCSIT_TRANSPORT_NAME  16
        char name[ISCSIT_TRANSPORT_NAME];
        int transport_type;
+       int priv_size;
        struct module *owner;
        struct list_head t_node;
        int (*iscsit_setup_np)(struct iscsi_np *, struct __kernel_sockaddr_storage *);
        int (*iscsit_accept_np)(struct iscsi_np *, struct iscsi_conn *);
        void (*iscsit_free_np)(struct iscsi_np *);
        void (*iscsit_free_conn)(struct iscsi_conn *);
-       struct iscsi_cmd *(*iscsit_alloc_cmd)(struct iscsi_conn *, gfp_t);
        int (*iscsit_get_login_rx)(struct iscsi_conn *, struct iscsi_login *);
        int (*iscsit_put_login_tx)(struct iscsi_conn *, struct iscsi_login *, u32);
        int (*iscsit_immediate_queue)(struct iscsi_conn *, struct iscsi_cmd *, int);
@@ -22,6 +22,11 @@ struct iscsit_transport {
        int (*iscsit_queue_status)(struct iscsi_conn *, struct iscsi_cmd *);
 };
 
+static inline void *iscsit_priv_cmd(struct iscsi_cmd *cmd)
+{
+       return (void *)(cmd + 1);
+}
+
 /*
  * From iscsi_target_transport.c
  */
@@ -92,3 +97,4 @@ extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *);
 extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t);
 extern int iscsit_sequence_cmd(struct iscsi_conn *, struct iscsi_cmd *,
                               unsigned char *, __be32);
+extern void iscsit_release_cmd(struct iscsi_cmd *);
index ffa2696..5ebe21c 100644 (file)
@@ -39,7 +39,8 @@ struct se_subsystem_api {
 };
 
 struct sbc_ops {
-       sense_reason_t (*execute_rw)(struct se_cmd *cmd);
+       sense_reason_t (*execute_rw)(struct se_cmd *cmd, struct scatterlist *,
+                                    u32, enum dma_data_direction);
        sense_reason_t (*execute_sync_cache)(struct se_cmd *cmd);
        sense_reason_t (*execute_write_same)(struct se_cmd *cmd);
        sense_reason_t (*execute_write_same_unmap)(struct se_cmd *cmd);
@@ -73,6 +74,10 @@ int  transport_set_vpd_ident(struct t10_vpd *, unsigned char *);
 /* core helpers also used by command snooping in pscsi */
 void   *transport_kmap_data_sg(struct se_cmd *);
 void   transport_kunmap_data_sg(struct se_cmd *);
+/* core helpers also used by xcopy during internal command setup */
+int    target_alloc_sgl(struct scatterlist **, unsigned int *, u32, bool);
+sense_reason_t transport_generic_map_mem_to_cmd(struct se_cmd *,
+               struct scatterlist *, u32, struct scatterlist *, u32);
 
 void   array_free(void *array, int n);
 
index e34fc90..5bdb8b7 100644 (file)
@@ -5,11 +5,12 @@
 #include <linux/configfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/blkdev.h>
+#include <linux/percpu_ida.h>
 #include <scsi/scsi_cmnd.h>
 #include <net/sock.h>
 #include <net/tcp.h>
 
-#define TARGET_CORE_MOD_VERSION                "v4.1.0-rc2-ml"
+#define TARGET_CORE_MOD_VERSION                "v4.1.0"
 #define TARGET_CORE_VERSION            TARGET_CORE_MOD_VERSION
 
 /* Maximum Number of LUNs per Target Portal Group */
  * block/blk-lib.c:blkdev_issue_discard()
  */
 #define DA_EMULATE_TPWS                                0
+/* Emulation for CompareAndWrite (AtomicTestandSet) by default */
+#define DA_EMULATE_CAW                         1
+/* Emulation for 3rd Party Copy (ExtendedCopy) by default */
+#define DA_EMULATE_3PC                         1
 /* No Emulation for PSCSI by default */
 #define DA_EMULATE_ALUA                                0
 /* Enforce SCSI Initiator Port TransportID with 'ISID' for PR */
@@ -158,6 +163,9 @@ enum se_cmd_flags_table {
        SCF_ALUA_NON_OPTIMIZED          = 0x00008000,
        SCF_PASSTHROUGH_SG_TO_MEM_NOALLOC = 0x00020000,
        SCF_ACK_KREF                    = 0x00040000,
+       SCF_COMPARE_AND_WRITE           = 0x00080000,
+       SCF_COMPARE_AND_WRITE_POST      = 0x00100000,
+       SCF_CMD_XCOPY_PASSTHROUGH       = 0x00200000,
 };
 
 /* struct se_dev_entry->lun_flags and struct se_lun->lun_access */
@@ -196,6 +204,7 @@ enum tcm_sense_reason_table {
        TCM_ADDRESS_OUT_OF_RANGE                = R(0x11),
        TCM_OUT_OF_RESOURCES                    = R(0x12),
        TCM_PARAMETER_LIST_LENGTH_ERROR         = R(0x13),
+       TCM_MISCOMPARE_VERIFY                   = R(0x14),
 #undef R
 };
 
@@ -415,6 +424,8 @@ struct se_cmd {
        enum dma_data_direction data_direction;
        /* For SAM Task Attribute */
        int                     sam_task_attr;
+       /* Used for se_sess->sess_tag_pool */
+       unsigned int            map_tag;
        /* Transport protocol dependent state, see transport_state_table */
        enum transport_state_table t_state;
        unsigned                cmd_wait_set:1;
@@ -444,11 +455,14 @@ struct se_cmd {
        struct kref             cmd_kref;
        struct target_core_fabric_ops *se_tfo;
        sense_reason_t          (*execute_cmd)(struct se_cmd *);
-       void (*transport_complete_callback)(struct se_cmd *);
+       sense_reason_t          (*execute_rw)(struct se_cmd *, struct scatterlist *,
+                                             u32, enum dma_data_direction);
+       sense_reason_t (*transport_complete_callback)(struct se_cmd *);
 
        unsigned char           *t_task_cdb;
        unsigned char           __t_task_cdb[TCM_MAX_COMMAND_SIZE];
        unsigned long long      t_task_lba;
+       unsigned int            t_task_nolb;
        unsigned int            transport_state;
 #define CMD_T_ABORTED          (1 << 0)
 #define CMD_T_ACTIVE           (1 << 1)
@@ -469,7 +483,9 @@ struct se_cmd {
        struct work_struct      work;
 
        struct scatterlist      *t_data_sg;
+       struct scatterlist      *t_data_sg_orig;
        unsigned int            t_data_nents;
+       unsigned int            t_data_nents_orig;
        void                    *t_data_vmap;
        struct scatterlist      *t_bidi_data_sg;
        unsigned int            t_bidi_data_nents;
@@ -536,6 +552,8 @@ struct se_session {
        struct list_head        sess_wait_list;
        spinlock_t              sess_cmd_lock;
        struct kref             sess_kref;
+       void                    *sess_cmd_map;
+       struct percpu_ida       sess_tag_pool;
 };
 
 struct se_device;
@@ -589,6 +607,8 @@ struct se_dev_attrib {
        int             emulate_tas;
        int             emulate_tpu;
        int             emulate_tpws;
+       int             emulate_caw;
+       int             emulate_3pc;
        int             enforce_pr_isids;
        int             is_nonrot;
        int             emulate_rest_reord;
@@ -656,6 +676,7 @@ struct se_device {
        spinlock_t              se_port_lock;
        spinlock_t              se_tmr_lock;
        spinlock_t              qf_cmd_lock;
+       struct semaphore        caw_sem;
        /* Used for legacy SPC-2 reservationsa */
        struct se_node_acl      *dev_reserved_node_acl;
        /* Used for ALUA Logical Unit Group membership */
@@ -669,6 +690,7 @@ struct se_device {
        struct list_head        delayed_cmd_list;
        struct list_head        state_list;
        struct list_head        qf_cmd_list;
+       struct list_head        g_dev_node;
        /* Pointer to associated SE HBA */
        struct se_hba           *se_hba;
        /* T10 Inquiry and VPD WWN Information */
index 7a16178..882b650 100644 (file)
@@ -84,6 +84,9 @@ struct target_core_fabric_ops {
 };
 
 struct se_session *transport_init_session(void);
+int transport_alloc_session_tags(struct se_session *, unsigned int,
+               unsigned int);
+struct se_session *transport_init_session_tags(unsigned int, unsigned int);
 void   __transport_register_session(struct se_portal_group *,
                struct se_node_acl *, struct se_session *, void *);
 void   transport_register_session(struct se_portal_group *,
@@ -131,6 +134,7 @@ int core_tmr_alloc_req(struct se_cmd *, void *, u8, gfp_t);
 void   core_tmr_release_req(struct se_tmr_req *);
 int    transport_generic_handle_tmr(struct se_cmd *);
 void   transport_generic_request_failure(struct se_cmd *, sense_reason_t);
+void   __target_execute_cmd(struct se_cmd *);
 int    transport_lookup_tmr_lun(struct se_cmd *, u32);
 
 struct se_node_acl *core_tpg_check_initiator_node_acl(struct se_portal_group *,
@@ -175,4 +179,30 @@ u32        iscsi_get_pr_transport_id_len(struct se_portal_group *, struct se_node_acl *
 char   *iscsi_parse_pr_out_transport_id(struct se_portal_group *, const char *,
                u32 *, char **);
 
+/*
+ * The LIO target core uses DMA_TO_DEVICE to mean that data is going
+ * to the target (eg handling a WRITE) and DMA_FROM_DEVICE to mean
+ * that data is coming from the target (eg handling a READ).  However,
+ * this is just the opposite of what we have to tell the DMA mapping
+ * layer -- eg when handling a READ, the HBA will have to DMA the data
+ * out of memory so it can send it to the initiator, which means we
+ * need to use DMA_TO_DEVICE when we map the data.
+ */
+static inline enum dma_data_direction
+target_reverse_dma_direction(struct se_cmd *se_cmd)
+{
+       if (se_cmd->se_cmd_flags & SCF_BIDI)
+               return DMA_BIDIRECTIONAL;
+
+       switch (se_cmd->data_direction) {
+       case DMA_TO_DEVICE:
+               return DMA_FROM_DEVICE;
+       case DMA_FROM_DEVICE:
+               return DMA_TO_DEVICE;
+       case DMA_NONE:
+       default:
+               return DMA_NONE;
+       }
+}
+
 #endif /* TARGET_CORE_FABRICH */
index 2902657..45702c3 100644 (file)
@@ -439,7 +439,7 @@ TRACE_EVENT(btrfs_sync_fs,
                { BTRFS_UPDATE_DELAYED_HEAD, "UPDATE_DELAYED_HEAD" })
                        
 
-TRACE_EVENT(btrfs_delayed_tree_ref,
+DECLARE_EVENT_CLASS(btrfs_delayed_tree_ref,
 
        TP_PROTO(struct btrfs_delayed_ref_node *ref,
                 struct btrfs_delayed_tree_ref *full_ref,
@@ -481,7 +481,25 @@ TRACE_EVENT(btrfs_delayed_tree_ref,
                  (unsigned long long)__entry->seq)
 );
 
-TRACE_EVENT(btrfs_delayed_data_ref,
+DEFINE_EVENT(btrfs_delayed_tree_ref,  add_delayed_tree_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_tree_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action)
+);
+
+DEFINE_EVENT(btrfs_delayed_tree_ref,  run_delayed_tree_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_tree_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action)
+);
+
+DECLARE_EVENT_CLASS(btrfs_delayed_data_ref,
 
        TP_PROTO(struct btrfs_delayed_ref_node *ref,
                 struct btrfs_delayed_data_ref *full_ref,
@@ -527,7 +545,25 @@ TRACE_EVENT(btrfs_delayed_data_ref,
                  (unsigned long long)__entry->seq)
 );
 
-TRACE_EVENT(btrfs_delayed_ref_head,
+DEFINE_EVENT(btrfs_delayed_data_ref,  add_delayed_data_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_data_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action)
+);
+
+DEFINE_EVENT(btrfs_delayed_data_ref,  run_delayed_data_ref,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_data_ref *full_ref,
+                int action),
+
+       TP_ARGS(ref, full_ref, action)
+);
+
+DECLARE_EVENT_CLASS(btrfs_delayed_ref_head,
 
        TP_PROTO(struct btrfs_delayed_ref_node *ref,
                 struct btrfs_delayed_ref_head *head_ref,
@@ -556,6 +592,24 @@ TRACE_EVENT(btrfs_delayed_ref_head,
                  __entry->is_data)
 );
 
+DEFINE_EVENT(btrfs_delayed_ref_head,  add_delayed_ref_head,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_ref_head *head_ref,
+                int action),
+
+       TP_ARGS(ref, head_ref, action)
+);
+
+DEFINE_EVENT(btrfs_delayed_ref_head,  run_delayed_ref_head,
+
+       TP_PROTO(struct btrfs_delayed_ref_node *ref,
+                struct btrfs_delayed_ref_head *head_ref,
+                int action),
+
+       TP_ARGS(ref, head_ref, action)
+);
+
 #define show_chunk_type(type)                                  \
        __print_flags(type, "|",                                \
                { BTRFS_BLOCK_GROUP_DATA,       "DATA"  },      \
index 6bc943e..d0c6134 100644 (file)
@@ -268,11 +268,13 @@ TRACE_EVENT(mm_page_alloc_extfrag,
 
        TP_PROTO(struct page *page,
                        int alloc_order, int fallback_order,
-                       int alloc_migratetype, int fallback_migratetype),
+                       int alloc_migratetype, int fallback_migratetype,
+                       int change_ownership),
 
        TP_ARGS(page,
                alloc_order, fallback_order,
-               alloc_migratetype, fallback_migratetype),
+               alloc_migratetype, fallback_migratetype,
+               change_ownership),
 
        TP_STRUCT__entry(
                __field(        struct page *,  page                    )
@@ -280,6 +282,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
                __field(        int,            fallback_order          )
                __field(        int,            alloc_migratetype       )
                __field(        int,            fallback_migratetype    )
+               __field(        int,            change_ownership        )
        ),
 
        TP_fast_assign(
@@ -288,6 +291,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
                __entry->fallback_order         = fallback_order;
                __entry->alloc_migratetype      = alloc_migratetype;
                __entry->fallback_migratetype   = fallback_migratetype;
+               __entry->change_ownership       = change_ownership;
        ),
 
        TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
@@ -299,7 +303,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
                __entry->alloc_migratetype,
                __entry->fallback_migratetype,
                __entry->fallback_order < pageblock_order,
-               __entry->alloc_migratetype == __entry->fallback_migratetype)
+               __entry->change_ownership)
 );
 
 #endif /* _TRACE_KMEM_H */
index 63cfccc..132a985 100644 (file)
@@ -202,7 +202,7 @@ TRACE_EVENT(mm_shrink_slab_start,
 
        TP_fast_assign(
                __entry->shr = shr;
-               __entry->shrink = shr->shrink;
+               __entry->shrink = shr->scan_objects;
                __entry->nr_objects_to_shrink = nr_objects_to_shrink;
                __entry->gfp_flags = sc->gfp_mask;
                __entry->pgs_scanned = pgs_scanned;
@@ -241,7 +241,7 @@ TRACE_EVENT(mm_shrink_slab_end,
 
        TP_fast_assign(
                __entry->shr = shr;
-               __entry->shrink = shr->shrink;
+               __entry->shrink = shr->scan_objects;
                __entry->unused_scan = unused_scan_cnt;
                __entry->new_scan = new_scan_cnt;
                __entry->retval = shrinker_retval;
index 05aed70..45e6189 100644 (file)
@@ -305,6 +305,31 @@ struct btrfs_ioctl_clone_range_args {
 #define BTRFS_DEFRAG_RANGE_COMPRESS 1
 #define BTRFS_DEFRAG_RANGE_START_IO 2
 
+#define BTRFS_SAME_DATA_DIFFERS        1
+/* For extent-same ioctl */
+struct btrfs_ioctl_same_extent_info {
+       __s64 fd;               /* in - destination file */
+       __u64 logical_offset;   /* in - start of extent in destination */
+       __u64 bytes_deduped;    /* out - total # of bytes we were able
+                                * to dedupe from this file */
+       /* status of this dedupe operation:
+        * 0 if dedup succeeds
+        * < 0 for error
+        * == BTRFS_SAME_DATA_DIFFERS if data differs
+        */
+       __s32 status;           /* out - see above description */
+       __u32 reserved;
+};
+
+struct btrfs_ioctl_same_args {
+       __u64 logical_offset;   /* in - start of extent in source */
+       __u64 length;           /* in - length of extent */
+       __u16 dest_count;       /* in - total elements in info array */
+       __u16 reserved1;
+       __u32 reserved2;
+       struct btrfs_ioctl_same_extent_info info[0];
+};
+
 struct btrfs_ioctl_space_info {
        __u64 flags;
        __u64 total_bytes;
@@ -524,7 +549,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
                                   struct btrfs_ioctl_search_args)
 #define BTRFS_IOC_INO_LOOKUP _IOWR(BTRFS_IOCTL_MAGIC, 18, \
                                   struct btrfs_ioctl_ino_lookup_args)
-#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, u64)
+#define BTRFS_IOC_DEFAULT_SUBVOL _IOW(BTRFS_IOCTL_MAGIC, 19, __u64)
 #define BTRFS_IOC_SPACE_INFO _IOWR(BTRFS_IOCTL_MAGIC, 20, \
                                    struct btrfs_ioctl_space_args)
 #define BTRFS_IOC_START_SYNC _IOR(BTRFS_IOCTL_MAGIC, 24, __u64)
@@ -579,4 +604,7 @@ static inline char *btrfs_err_str(enum btrfs_err_code err_code)
                                      struct btrfs_ioctl_get_dev_stats)
 #define BTRFS_IOC_DEV_REPLACE _IOWR(BTRFS_IOCTL_MAGIC, 53, \
                                    struct btrfs_ioctl_dev_replace_args)
+#define BTRFS_IOC_FILE_EXTENT_SAME _IOWR(BTRFS_IOCTL_MAGIC, 54, \
+                                        struct btrfs_ioctl_same_args)
+
 #endif /* _UAPI_LINUX_BTRFS_H */
diff --git a/include/uapi/linux/cifs/cifs_mount.h b/include/uapi/linux/cifs/cifs_mount.h
new file mode 100644 (file)
index 0000000..d7e4c6c
--- /dev/null
@@ -0,0 +1,27 @@
+/*
+ *   include/uapi/linux/cifs/cifs_mount.h
+ *
+ *   Author(s): Scott Lovenberg (scott.lovenberg@gmail.com)
+ *
+ *   This library is free software; you can redistribute it and/or modify
+ *   it under the terms of the GNU Lesser General Public License as published
+ *   by the Free Software Foundation; either version 2.1 of the License, or
+ *   (at your option) any later version.
+ *
+ *   This library is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
+ *   the GNU Lesser General Public License for more details.
+ */
+#ifndef _CIFS_MOUNT_H
+#define _CIFS_MOUNT_H
+
+/* Max string lengths for cifs mounting options. */
+#define CIFS_MAX_DOMAINNAME_LEN 256 /* max fully qualified domain name */
+#define CIFS_MAX_USERNAME_LEN   256 /* reasonable max for current servers */
+#define CIFS_MAX_PASSWORD_LEN   512 /* Windows max seems to be 256 wide chars */
+#define CIFS_MAX_SHARE_LEN      256 /* reasonable max share name length */
+#define CIFS_NI_MAXHOST        1024 /* max host name length (256 * 4 bytes) */
+
+
+#endif /* _CIFS_MOUNT_H */
index a4ed56c..6c28b61 100644 (file)
@@ -49,9 +49,9 @@ struct files_stat_struct {
 };
 
 struct inodes_stat_t {
-       int nr_inodes;
-       int nr_unused;
-       int dummy[5];           /* padding for sysctl ABI compatibility */
+       long nr_inodes;
+       long nr_unused;
+       long dummy[5];          /* padding for sysctl ABI compatibility */
 };
 
 
index d08abf9..a372627 100644 (file)
@@ -152,6 +152,7 @@ struct input_keymap_entry {
 #define EVIOCGEFFECTS          _IOR('E', 0x84, int)                    /* Report number of effects playable at the same time */
 
 #define EVIOCGRAB              _IOW('E', 0x90, int)                    /* Grab/Release device */
+#define EVIOCREVOKE            _IOW('E', 0x91, int)                    /* Revoke device access */
 
 #define EVIOCSCLOCKID          _IOW('E', 0xa0, int)                    /* Set clockid to be used for timestamps */
 
index ca1d90b..40a1fb8 100644 (file)
@@ -324,7 +324,7 @@ struct perf_event_attr {
 #define PERF_EVENT_IOC_PERIOD          _IOW('$', 4, __u64)
 #define PERF_EVENT_IOC_SET_OUTPUT      _IO ('$', 5)
 #define PERF_EVENT_IOC_SET_FILTER      _IOW('$', 6, char *)
-#define PERF_EVENT_IOC_ID              _IOR('$', 7, u64 *)
+#define PERF_EVENT_IOC_ID              _IOR('$', 7, __u64 *)
 
 enum perf_event_ioc_flags {
        PERF_IOC_FLAG_GROUP             = 1U << 0,
index 18bd9e3..3ecd8a1 100644 (file)
@@ -1602,7 +1602,7 @@ endchoice
 
 config SLUB_CPU_PARTIAL
        default y
-       depends on SLUB
+       depends on SLUB && SMP
        bool "SLUB per cpu partial cache"
        help
          Per cpu partial caches accellerate objects allocation and freeing
index 816014c..a51cddc 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/async.h>
 #include <linux/fs_struct.h>
 #include <linux/slab.h>
+#include <linux/ramfs.h>
+#include <linux/shmem_fs.h>
 
 #include <linux/nfs_fs.h>
 #include <linux/nfs_fs_sb.h>
@@ -588,3 +590,46 @@ out:
        sys_mount(".", "/", NULL, MS_MOVE, NULL);
        sys_chroot(".");
 }
+
+static bool is_tmpfs;
+static struct dentry *rootfs_mount(struct file_system_type *fs_type,
+       int flags, const char *dev_name, void *data)
+{
+       static unsigned long once;
+       void *fill = ramfs_fill_super;
+
+       if (test_and_set_bit(0, &once))
+               return ERR_PTR(-ENODEV);
+
+       if (IS_ENABLED(CONFIG_TMPFS) && is_tmpfs)
+               fill = shmem_fill_super;
+
+       return mount_nodev(fs_type, flags, data, fill);
+}
+
+static struct file_system_type rootfs_fs_type = {
+       .name           = "rootfs",
+       .mount          = rootfs_mount,
+       .kill_sb        = kill_litter_super,
+};
+
+int __init init_rootfs(void)
+{
+       int err = register_filesystem(&rootfs_fs_type);
+
+       if (err)
+               return err;
+
+       if (IS_ENABLED(CONFIG_TMPFS) && !saved_root_name[0] &&
+               (!root_fs_names || strstr(root_fs_names, "tmpfs"))) {
+               err = shmem_init();
+               is_tmpfs = true;
+       } else {
+               err = init_ramfs_fs();
+       }
+
+       if (err)
+               unregister_filesystem(&rootfs_fs_type);
+
+       return err;
+}
index b65fdf1..b0d541d 100644 (file)
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -70,8 +70,6 @@ struct msg_sender {
 
 #define msg_ids(ns)    ((ns)->ids[IPC_MSG_IDS])
 
-#define msg_unlock(msq)                ipc_unlock(&(msq)->q_perm)
-
 static void freeque(struct ipc_namespace *, struct kern_ipc_perm *);
 static int newque(struct ipc_namespace *, struct ipc_params *);
 #ifdef CONFIG_PROC_FS
@@ -172,7 +170,7 @@ static inline void msg_rmid(struct ipc_namespace *ns, struct msg_queue *s)
  * @ns: namespace
  * @params: ptr to the structure that contains the key and msgflg
  *
- * Called with msg_ids.rw_mutex held (writer)
+ * Called with msg_ids.rwsem held (writer)
  */
 static int newque(struct ipc_namespace *ns, struct ipc_params *params)
 {
@@ -259,8 +257,8 @@ static void expunge_all(struct msg_queue *msq, int res)
  * removes the message queue from message queue ID IDR, and cleans up all the
  * messages associated with this queue.
  *
- * msg_ids.rw_mutex (writer) and the spinlock for this message queue are held
- * before freeque() is called. msg_ids.rw_mutex remains locked on exit.
+ * msg_ids.rwsem (writer) and the spinlock for this message queue are held
+ * before freeque() is called. msg_ids.rwsem remains locked on exit.
  */
 static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
@@ -270,7 +268,8 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
        expunge_all(msq, -EIDRM);
        ss_wakeup(&msq->q_senders, 1);
        msg_rmid(ns, msq);
-       msg_unlock(msq);
+       ipc_unlock_object(&msq->q_perm);
+       rcu_read_unlock();
 
        list_for_each_entry_safe(msg, t, &msq->q_messages, m_list) {
                atomic_dec(&ns->msg_hdrs);
@@ -282,7 +281,7 @@ static void freeque(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 }
 
 /*
- * Called with msg_ids.rw_mutex and ipcp locked.
+ * Called with msg_ids.rwsem and ipcp locked.
  */
 static inline int msg_security(struct kern_ipc_perm *ipcp, int msgflg)
 {
@@ -386,9 +385,9 @@ copy_msqid_from_user(struct msqid64_ds *out, void __user *buf, int version)
 }
 
 /*
- * This function handles some msgctl commands which require the rw_mutex
+ * This function handles some msgctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
                       struct msqid_ds __user *buf, int version)
@@ -403,7 +402,7 @@ static int msgctl_down(struct ipc_namespace *ns, int msqid, int cmd,
                        return -EFAULT;
        }
 
-       down_write(&msg_ids(ns).rw_mutex);
+       down_write(&msg_ids(ns).rwsem);
        rcu_read_lock();
 
        ipcp = ipcctl_pre_down_nolock(ns, &msg_ids(ns), msqid, cmd,
@@ -459,7 +458,7 @@ out_unlock0:
 out_unlock1:
        rcu_read_unlock();
 out_up:
-       up_write(&msg_ids(ns).rw_mutex);
+       up_write(&msg_ids(ns).rwsem);
        return err;
 }
 
@@ -494,7 +493,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
                msginfo.msgmnb = ns->msg_ctlmnb;
                msginfo.msgssz = MSGSSZ;
                msginfo.msgseg = MSGSEG;
-               down_read(&msg_ids(ns).rw_mutex);
+               down_read(&msg_ids(ns).rwsem);
                if (cmd == MSG_INFO) {
                        msginfo.msgpool = msg_ids(ns).in_use;
                        msginfo.msgmap = atomic_read(&ns->msg_hdrs);
@@ -505,7 +504,7 @@ static int msgctl_nolock(struct ipc_namespace *ns, int msqid,
                        msginfo.msgtql = MSGTQL;
                }
                max_id = ipc_get_maxid(&msg_ids(ns));
-               up_read(&msg_ids(ns).rw_mutex);
+               up_read(&msg_ids(ns).rwsem);
                if (copy_to_user(buf, &msginfo, sizeof(struct msginfo)))
                        return -EFAULT;
                return (max_id < 0) ? 0 : max_id;
index 4be6581..59451c1 100644 (file)
@@ -81,7 +81,7 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
        int next_id;
        int total, in_use;
 
-       down_write(&ids->rw_mutex);
+       down_write(&ids->rwsem);
 
        in_use = ids->in_use;
 
@@ -89,11 +89,12 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
                perm = idr_find(&ids->ipcs_idr, next_id);
                if (perm == NULL)
                        continue;
-               ipc_lock_by_ptr(perm);
+               rcu_read_lock();
+               ipc_lock_object(perm);
                free(ns, perm);
                total++;
        }
-       up_write(&ids->rw_mutex);
+       up_write(&ids->rwsem);
 }
 
 static void free_ipc_ns(struct ipc_namespace *ns)
index 4108889..69b6a21 100644 (file)
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -322,7 +322,7 @@ static inline void sem_unlock(struct sem_array *sma, int locknum)
 }
 
 /*
- * sem_lock_(check_) routines are called in the paths where the rw_mutex
+ * sem_lock_(check_) routines are called in the paths where the rwsem
  * is not held.
  *
  * The caller holds the RCU read lock.
@@ -426,7 +426,7 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
  * @ns: namespace
  * @params: ptr to the structure that contains key, semflg and nsems
  *
- * Called with sem_ids.rw_mutex held (as a writer)
+ * Called with sem_ids.rwsem held (as a writer)
  */
 
 static int newary(struct ipc_namespace *ns, struct ipc_params *params)
@@ -492,7 +492,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
 
 
 /*
- * Called with sem_ids.rw_mutex and ipcp locked.
+ * Called with sem_ids.rwsem and ipcp locked.
  */
 static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 {
@@ -503,7 +503,7 @@ static inline int sem_security(struct kern_ipc_perm *ipcp, int semflg)
 }
 
 /*
- * Called with sem_ids.rw_mutex and ipcp locked.
+ * Called with sem_ids.rwsem and ipcp locked.
  */
 static inline int sem_more_checks(struct kern_ipc_perm *ipcp,
                                struct ipc_params *params)
@@ -994,8 +994,8 @@ static int count_semzcnt (struct sem_array * sma, ushort semnum)
        return semzcnt;
 }
 
-/* Free a semaphore set. freeary() is called with sem_ids.rw_mutex locked
- * as a writer and the spinlock for this semaphore set hold. sem_ids.rw_mutex
+/* Free a semaphore set. freeary() is called with sem_ids.rwsem locked
+ * as a writer and the spinlock for this semaphore set hold. sem_ids.rwsem
  * remains locked on exit.
  */
 static void freeary(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
@@ -1116,7 +1116,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
                seminfo.semmnu = SEMMNU;
                seminfo.semmap = SEMMAP;
                seminfo.semume = SEMUME;
-               down_read(&sem_ids(ns).rw_mutex);
+               down_read(&sem_ids(ns).rwsem);
                if (cmd == SEM_INFO) {
                        seminfo.semusz = sem_ids(ns).in_use;
                        seminfo.semaem = ns->used_sems;
@@ -1125,7 +1125,7 @@ static int semctl_nolock(struct ipc_namespace *ns, int semid,
                        seminfo.semaem = SEMAEM;
                }
                max_id = ipc_get_maxid(&sem_ids(ns));
-               up_read(&sem_ids(ns).rw_mutex);
+               up_read(&sem_ids(ns).rwsem);
                if (copy_to_user(p, &seminfo, sizeof(struct seminfo))) 
                        return -EFAULT;
                return (max_id < 0) ? 0: max_id;
@@ -1431,9 +1431,9 @@ copy_semid_from_user(struct semid64_ds *out, void __user *buf, int version)
 }
 
 /*
- * This function handles some semctl commands which require the rw_mutex
+ * This function handles some semctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int semctl_down(struct ipc_namespace *ns, int semid,
                       int cmd, int version, void __user *p)
@@ -1448,7 +1448,7 @@ static int semctl_down(struct ipc_namespace *ns, int semid,
                        return -EFAULT;
        }
 
-       down_write(&sem_ids(ns).rw_mutex);
+       down_write(&sem_ids(ns).rwsem);
        rcu_read_lock();
 
        ipcp = ipcctl_pre_down_nolock(ns, &sem_ids(ns), semid, cmd,
@@ -1487,7 +1487,7 @@ out_unlock0:
 out_unlock1:
        rcu_read_unlock();
 out_up:
-       up_write(&sem_ids(ns).rw_mutex);
+       up_write(&sem_ids(ns).rwsem);
        return err;
 }
 
index c6b4ad5..2821cdf 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -19,6 +19,9 @@
  * namespaces support
  * OpenVZ, SWsoft Inc.
  * Pavel Emelianov <xemul@openvz.org>
+ *
+ * Better ipc lock (kern_ipc_perm.lock) handling
+ * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
  */
 
 #include <linux/slab.h>
@@ -80,8 +83,8 @@ void shm_init_ns(struct ipc_namespace *ns)
 }
 
 /*
- * Called with shm_ids.rw_mutex (writer) and the shp structure locked.
- * Only shm_ids.rw_mutex remains locked on exit.
+ * Called with shm_ids.rwsem (writer) and the shp structure locked.
+ * Only shm_ids.rwsem remains locked on exit.
  */
 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
 {
@@ -124,8 +127,28 @@ void __init shm_init (void)
                                IPC_SHM_IDS, sysvipc_shm_proc_show);
 }
 
+static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_obtain_object(&shm_ids(ns), id);
+
+       if (IS_ERR(ipcp))
+               return ERR_CAST(ipcp);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
+static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
+{
+       struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
+
+       if (IS_ERR(ipcp))
+               return ERR_CAST(ipcp);
+
+       return container_of(ipcp, struct shmid_kernel, shm_perm);
+}
+
 /*
- * shm_lock_(check_) routines are called in the paths where the rw_mutex
+ * shm_lock_(check_) routines are called in the paths where the rwsem
  * is not necessarily held.
  */
 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
@@ -144,17 +167,6 @@ static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
        ipc_lock_object(&ipcp->shm_perm);
 }
 
-static inline struct shmid_kernel *shm_lock_check(struct ipc_namespace *ns,
-                                               int id)
-{
-       struct kern_ipc_perm *ipcp = ipc_lock_check(&shm_ids(ns), id);
-
-       if (IS_ERR(ipcp))
-               return (struct shmid_kernel *)ipcp;
-
-       return container_of(ipcp, struct shmid_kernel, shm_perm);
-}
-
 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
 {
        ipc_rmid(&shm_ids(ns), &s->shm_perm);
@@ -182,7 +194,7 @@ static void shm_open(struct vm_area_struct *vma)
  * @ns: namespace
  * @shp: struct to free
  *
- * It has to be called with shp and shm_ids.rw_mutex (writer) locked,
+ * It has to be called with shp and shm_ids.rwsem (writer) locked,
  * but returns with shp unlocked and freed.
  */
 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
@@ -230,7 +242,7 @@ static void shm_close(struct vm_area_struct *vma)
        struct shmid_kernel *shp;
        struct ipc_namespace *ns = sfd->ns;
 
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        /* remove from the list of attaches of the shm segment */
        shp = shm_lock(ns, sfd->id);
        BUG_ON(IS_ERR(shp));
@@ -241,10 +253,10 @@ static void shm_close(struct vm_area_struct *vma)
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
 }
 
-/* Called with ns->shm_ids(ns).rw_mutex locked */
+/* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_current(int id, void *p, void *data)
 {
        struct ipc_namespace *ns = data;
@@ -275,7 +287,7 @@ static int shm_try_destroy_current(int id, void *p, void *data)
        return 0;
 }
 
-/* Called with ns->shm_ids(ns).rw_mutex locked */
+/* Called with ns->shm_ids(ns).rwsem locked */
 static int shm_try_destroy_orphaned(int id, void *p, void *data)
 {
        struct ipc_namespace *ns = data;
@@ -286,7 +298,7 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
         * We want to destroy segments without users and with already
         * exit'ed originating process.
         *
-        * As shp->* are changed under rw_mutex, it's safe to skip shp locking.
+        * As shp->* are changed under rwsem, it's safe to skip shp locking.
         */
        if (shp->shm_creator != NULL)
                return 0;
@@ -300,10 +312,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)
 
 void shm_destroy_orphaned(struct ipc_namespace *ns)
 {
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        if (shm_ids(ns).in_use)
                idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
 }
 
 
@@ -315,10 +327,10 @@ void exit_shm(struct task_struct *task)
                return;
 
        /* Destroy all already created segments, but not mapped yet */
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        if (shm_ids(ns).in_use)
                idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns);
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
 }
 
 static int shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -452,7 +464,7 @@ static const struct vm_operations_struct shm_vm_ops = {
  * @ns: namespace
  * @params: ptr to the structure that contains key, size and shmflg
  *
- * Called with shm_ids.rw_mutex held as a writer.
+ * Called with shm_ids.rwsem held as a writer.
  */
 
 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
@@ -560,7 +572,7 @@ no_file:
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 {
@@ -571,7 +583,7 @@ static inline int shm_security(struct kern_ipc_perm *ipcp, int shmflg)
 }
 
 /*
- * Called with shm_ids.rw_mutex and ipcp locked.
+ * Called with shm_ids.rwsem and ipcp locked.
  */
 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
                                struct ipc_params *params)
@@ -684,7 +696,7 @@ static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminf
 
 /*
  * Calculate and add used RSS and swap pages of a shm.
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_add_rss_swap(struct shmid_kernel *shp,
        unsigned long *rss_add, unsigned long *swp_add)
@@ -711,7 +723,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp,
 }
 
 /*
- * Called with shm_ids.rw_mutex held as a reader
+ * Called with shm_ids.rwsem held as a reader
  */
 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
                unsigned long *swp)
@@ -740,9 +752,9 @@ static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
 }
 
 /*
- * This function handles some shmctl commands which require the rw_mutex
+ * This function handles some shmctl commands which require the rwsem
  * to be held in write mode.
- * NOTE: no locks must be held, the rw_mutex is taken inside this function.
+ * NOTE: no locks must be held, the rwsem is taken inside this function.
  */
 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
                       struct shmid_ds __user *buf, int version)
@@ -757,14 +769,13 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
                        return -EFAULT;
        }
 
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        rcu_read_lock();
 
-       ipcp = ipcctl_pre_down(ns, &shm_ids(ns), shmid, cmd,
-                              &shmid64.shm_perm, 0);
+       ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
+                                     &shmid64.shm_perm, 0);
        if (IS_ERR(ipcp)) {
                err = PTR_ERR(ipcp);
-               /* the ipc lock is not held upon failure */
                goto out_unlock1;
        }
 
@@ -772,14 +783,16 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
 
        err = security_shm_shmctl(shp, cmd);
        if (err)
-               goto out_unlock0;
+               goto out_unlock1;
 
        switch (cmd) {
        case IPC_RMID:
+               ipc_lock_object(&shp->shm_perm);
                /* do_shm_rmid unlocks the ipc object and rcu */
                do_shm_rmid(ns, ipcp);
                goto out_up;
        case IPC_SET:
+               ipc_lock_object(&shp->shm_perm);
                err = ipc_update_perm(&shmid64.shm_perm, ipcp);
                if (err)
                        goto out_unlock0;
@@ -787,6 +800,7 @@ static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
                break;
        default:
                err = -EINVAL;
+               goto out_unlock1;
        }
 
 out_unlock0:
@@ -794,33 +808,28 @@ out_unlock0:
 out_unlock1:
        rcu_read_unlock();
 out_up:
-       up_write(&shm_ids(ns).rw_mutex);
+       up_write(&shm_ids(ns).rwsem);
        return err;
 }
 
-SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+static int shmctl_nolock(struct ipc_namespace *ns, int shmid,
+                        int cmd, int version, void __user *buf)
 {
+       int err;
        struct shmid_kernel *shp;
-       int err, version;
-       struct ipc_namespace *ns;
 
-       if (cmd < 0 || shmid < 0) {
-               err = -EINVAL;
-               goto out;
+       /* preliminary security checks for *_INFO */
+       if (cmd == IPC_INFO || cmd == SHM_INFO) {
+               err = security_shm_shmctl(NULL, cmd);
+               if (err)
+                       return err;
        }
 
-       version = ipc_parse_version(&cmd);
-       ns = current->nsproxy->ipc_ns;
-
-       switch (cmd) { /* replace with proc interface ? */
+       switch (cmd) {
        case IPC_INFO:
        {
                struct shminfo64 shminfo;
 
-               err = security_shm_shmctl(NULL, cmd);
-               if (err)
-                       return err;
-
                memset(&shminfo, 0, sizeof(shminfo));
                shminfo.shmmni = shminfo.shmseg = ns->shm_ctlmni;
                shminfo.shmmax = ns->shm_ctlmax;
@@ -830,9 +839,9 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                if(copy_shminfo_to_user (buf, &shminfo, version))
                        return -EFAULT;
 
-               down_read(&shm_ids(ns).rw_mutex);
+               down_read(&shm_ids(ns).rwsem);
                err = ipc_get_maxid(&shm_ids(ns));
-               up_read(&shm_ids(ns).rw_mutex);
+               up_read(&shm_ids(ns).rwsem);
 
                if(err<0)
                        err = 0;
@@ -842,19 +851,15 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
        {
                struct shm_info shm_info;
 
-               err = security_shm_shmctl(NULL, cmd);
-               if (err)
-                       return err;
-
                memset(&shm_info, 0, sizeof(shm_info));
-               down_read(&shm_ids(ns).rw_mutex);
+               down_read(&shm_ids(ns).rwsem);
                shm_info.used_ids = shm_ids(ns).in_use;
                shm_get_stat (ns, &shm_info.shm_rss, &shm_info.shm_swp);
                shm_info.shm_tot = ns->shm_tot;
                shm_info.swap_attempts = 0;
                shm_info.swap_successes = 0;
                err = ipc_get_maxid(&shm_ids(ns));
-               up_read(&shm_ids(ns).rw_mutex);
+               up_read(&shm_ids(ns).rwsem);
                if (copy_to_user(buf, &shm_info, sizeof(shm_info))) {
                        err = -EFAULT;
                        goto out;
@@ -869,27 +874,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                struct shmid64_ds tbuf;
                int result;
 
+               rcu_read_lock();
                if (cmd == SHM_STAT) {
-                       shp = shm_lock(ns, shmid);
+                       shp = shm_obtain_object(ns, shmid);
                        if (IS_ERR(shp)) {
                                err = PTR_ERR(shp);
-                               goto out;
+                               goto out_unlock;
                        }
                        result = shp->shm_perm.id;
                } else {
-                       shp = shm_lock_check(ns, shmid);
+                       shp = shm_obtain_object_check(ns, shmid);
                        if (IS_ERR(shp)) {
                                err = PTR_ERR(shp);
-                               goto out;
+                               goto out_unlock;
                        }
                        result = 0;
                }
+
                err = -EACCES;
                if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
                        goto out_unlock;
+
                err = security_shm_shmctl(shp, cmd);
                if (err)
                        goto out_unlock;
+
                memset(&tbuf, 0, sizeof(tbuf));
                kernel_to_ipc64_perm(&shp->shm_perm, &tbuf.shm_perm);
                tbuf.shm_segsz  = shp->shm_segsz;
@@ -899,43 +908,76 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                tbuf.shm_cpid   = shp->shm_cprid;
                tbuf.shm_lpid   = shp->shm_lprid;
                tbuf.shm_nattch = shp->shm_nattch;
-               shm_unlock(shp);
-               if(copy_shmid_to_user (buf, &tbuf, version))
+               rcu_read_unlock();
+
+               if (copy_shmid_to_user(buf, &tbuf, version))
                        err = -EFAULT;
                else
                        err = result;
                goto out;
        }
+       default:
+               return -EINVAL;
+       }
+
+out_unlock:
+       rcu_read_unlock();
+out:
+       return err;
+}
+
+SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
+{
+       struct shmid_kernel *shp;
+       int err, version;
+       struct ipc_namespace *ns;
+
+       if (cmd < 0 || shmid < 0)
+               return -EINVAL;
+
+       version = ipc_parse_version(&cmd);
+       ns = current->nsproxy->ipc_ns;
+
+       switch (cmd) {
+       case IPC_INFO:
+       case SHM_INFO:
+       case SHM_STAT:
+       case IPC_STAT:
+               return shmctl_nolock(ns, shmid, cmd, version, buf);
+       case IPC_RMID:
+       case IPC_SET:
+               return shmctl_down(ns, shmid, cmd, buf, version);
        case SHM_LOCK:
        case SHM_UNLOCK:
        {
                struct file *shm_file;
 
-               shp = shm_lock_check(ns, shmid);
+               rcu_read_lock();
+               shp = shm_obtain_object_check(ns, shmid);
                if (IS_ERR(shp)) {
                        err = PTR_ERR(shp);
-                       goto out;
+                       goto out_unlock1;
                }
 
                audit_ipc_obj(&(shp->shm_perm));
+               err = security_shm_shmctl(shp, cmd);
+               if (err)
+                       goto out_unlock1;
 
+               ipc_lock_object(&shp->shm_perm);
                if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
                        kuid_t euid = current_euid();
                        err = -EPERM;
                        if (!uid_eq(euid, shp->shm_perm.uid) &&
                            !uid_eq(euid, shp->shm_perm.cuid))
-                               goto out_unlock;
+                               goto out_unlock0;
                        if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK))
-                               goto out_unlock;
+                               goto out_unlock0;
                }
 
-               err = security_shm_shmctl(shp, cmd);
-               if (err)
-                       goto out_unlock;
-
                shm_file = shp->shm_file;
                if (is_file_hugepages(shm_file))
-                       goto out_unlock;
+                       goto out_unlock0;
 
                if (cmd == SHM_LOCK) {
                        struct user_struct *user = current_user();
@@ -944,32 +986,31 @@ SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
                                shp->shm_perm.mode |= SHM_LOCKED;
                                shp->mlock_user = user;
                        }
-                       goto out_unlock;
+                       goto out_unlock0;
                }
 
                /* SHM_UNLOCK */
                if (!(shp->shm_perm.mode & SHM_LOCKED))
-                       goto out_unlock;
+                       goto out_unlock0;
                shmem_lock(shm_file, 0, shp->mlock_user);
                shp->shm_perm.mode &= ~SHM_LOCKED;
                shp->mlock_user = NULL;
                get_file(shm_file);
-               shm_unlock(shp);
+               ipc_unlock_object(&shp->shm_perm);
+               rcu_read_unlock();
                shmem_unlock_mapping(shm_file->f_mapping);
+
                fput(shm_file);
-               goto out;
-       }
-       case IPC_RMID:
-       case IPC_SET:
-               err = shmctl_down(ns, shmid, cmd, buf, version);
                return err;
+       }
        default:
                return -EINVAL;
        }
 
-out_unlock:
-       shm_unlock(shp);
-out:
+out_unlock0:
+       ipc_unlock_object(&shp->shm_perm);
+out_unlock1:
+       rcu_read_unlock();
        return err;
 }
 
@@ -1037,10 +1078,11 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
         * additional creator id...
         */
        ns = current->nsproxy->ipc_ns;
-       shp = shm_lock_check(ns, shmid);
+       rcu_read_lock();
+       shp = shm_obtain_object_check(ns, shmid);
        if (IS_ERR(shp)) {
                err = PTR_ERR(shp);
-               goto out;
+               goto out_unlock;
        }
 
        err = -EACCES;
@@ -1051,24 +1093,31 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
        if (err)
                goto out_unlock;
 
+       ipc_lock_object(&shp->shm_perm);
        path = shp->shm_file->f_path;
        path_get(&path);
        shp->shm_nattch++;
        size = i_size_read(path.dentry->d_inode);
-       shm_unlock(shp);
+       ipc_unlock_object(&shp->shm_perm);
+       rcu_read_unlock();
 
        err = -ENOMEM;
        sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
-       if (!sfd)
-               goto out_put_dentry;
+       if (!sfd) {
+               path_put(&path);
+               goto out_nattch;
+       }
 
        file = alloc_file(&path, f_mode,
                          is_file_hugepages(shp->shm_file) ?
                                &shm_file_operations_huge :
                                &shm_file_operations);
        err = PTR_ERR(file);
-       if (IS_ERR(file))
-               goto out_free;
+       if (IS_ERR(file)) {
+               kfree(sfd);
+               path_put(&path);
+               goto out_nattch;
+       }
 
        file->private_data = sfd;
        file->f_mapping = shp->shm_file->f_mapping;
@@ -1094,7 +1143,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr,
                    addr > current->mm->start_stack - size - PAGE_SIZE * 5)
                        goto invalid;
        }
-               
+
        addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate);
        *raddr = addr;
        err = 0;
@@ -1109,7 +1158,7 @@ out_fput:
        fput(file);
 
 out_nattch:
-       down_write(&shm_ids(ns).rw_mutex);
+       down_write(&shm_ids(ns).rwsem);
        shp = shm_lock(ns, shmid);
        BUG_ON(IS_ERR(shp));
        shp->shm_nattch--;
@@ -1117,20 +1166,13 @@ out_nattch:
                shm_destroy(ns, shp);
        else
                shm_unlock(shp);
-       up_write(&shm_ids(ns).rw_mutex);
-
-out:
+       up_write(&shm_ids(ns).rwsem);
        return err;
 
 out_unlock:
-       shm_unlock(shp);
-       goto out;
-
-out_free:
-       kfree(sfd);
-out_put_dentry:
-       path_put(&path);
-       goto out_nattch;
+       rcu_read_unlock();
+out:
+       return err;
 }
 
 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
@@ -1235,8 +1277,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
 #else /* CONFIG_MMU */
        /* under NOMMU conditions, the exact address to be destroyed must be
         * given */
-       retval = -EINVAL;
-       if (vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
+       if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
                do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start);
                retval = 0;
        }
index 4704223..e829da9 100644 (file)
  * Jun 2006 - namespaces ssupport
  *            OpenVZ, SWsoft Inc.
  *            Pavel Emelianov <xemul@openvz.org>
+ *
+ * General sysv ipc locking scheme:
+ *  when doing ipc id lookups, take the ids->rwsem
+ *      rcu_read_lock()
+ *          obtain the ipc object (kern_ipc_perm)
+ *          perform security, capabilities, auditing and permission checks, etc.
+ *          acquire the ipc lock (kern_ipc_perm.lock) throught ipc_lock_object()
+ *             perform data updates (ie: SET, RMID, LOCK/UNLOCK commands)
  */
 
 #include <linux/mm.h>
@@ -119,7 +127,7 @@ __initcall(ipc_init);
  
 void ipc_init_ids(struct ipc_ids *ids)
 {
-       init_rwsem(&ids->rw_mutex);
+       init_rwsem(&ids->rwsem);
 
        ids->in_use = 0;
        ids->seq = 0;
@@ -174,7 +182,7 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
  *     @ids: Identifier set
  *     @key: The key to find
  *     
- *     Requires ipc_ids.rw_mutex locked.
+ *     Requires ipc_ids.rwsem locked.
  *     Returns the LOCKED pointer to the ipc structure if found or NULL
  *     if not.
  *     If key is found ipc points to the owning ipc structure
@@ -197,7 +205,8 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
                        continue;
                }
 
-               ipc_lock_by_ptr(ipc);
+               rcu_read_lock();
+               ipc_lock_object(ipc);
                return ipc;
        }
 
@@ -208,7 +217,7 @@ static struct kern_ipc_perm *ipc_findkey(struct ipc_ids *ids, key_t key)
  *     ipc_get_maxid   -       get the last assigned id
  *     @ids: IPC identifier set
  *
- *     Called with ipc_ids.rw_mutex held.
+ *     Called with ipc_ids.rwsem held.
  */
 
 int ipc_get_maxid(struct ipc_ids *ids)
@@ -246,7 +255,7 @@ int ipc_get_maxid(struct ipc_ids *ids)
  *     is returned. The 'new' entry is returned in a locked state on success.
  *     On failure the entry is not locked and a negative err-code is returned.
  *
- *     Called with writer ipc_ids.rw_mutex held.
+ *     Called with writer ipc_ids.rwsem held.
  */
 int ipc_addid(struct ipc_ids* ids, struct kern_ipc_perm* new, int size)
 {
@@ -312,9 +321,9 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
 {
        int err;
 
-       down_write(&ids->rw_mutex);
+       down_write(&ids->rwsem);
        err = ops->getnew(ns, params);
-       up_write(&ids->rw_mutex);
+       up_write(&ids->rwsem);
        return err;
 }
 
@@ -331,7 +340,7 @@ static int ipcget_new(struct ipc_namespace *ns, struct ipc_ids *ids,
  *
  *     On success, the IPC id is returned.
  *
- *     It is called with ipc_ids.rw_mutex and ipcp->lock held.
+ *     It is called with ipc_ids.rwsem and ipcp->lock held.
  */
 static int ipc_check_perms(struct ipc_namespace *ns,
                           struct kern_ipc_perm *ipcp,
@@ -376,7 +385,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
         * Take the lock as a writer since we are potentially going to add
         * a new entry + read locks are not "upgradable"
         */
-       down_write(&ids->rw_mutex);
+       down_write(&ids->rwsem);
        ipcp = ipc_findkey(ids, params->key);
        if (ipcp == NULL) {
                /* key not used */
@@ -402,7 +411,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
                }
                ipc_unlock(ipcp);
        }
-       up_write(&ids->rw_mutex);
+       up_write(&ids->rwsem);
 
        return err;
 }
@@ -413,7 +422,7 @@ static int ipcget_public(struct ipc_namespace *ns, struct ipc_ids *ids,
  *     @ids: IPC identifier set
  *     @ipcp: ipc perm structure containing the identifier to remove
  *
- *     ipc_ids.rw_mutex (as a writer) and the spinlock for this ID are held
+ *     ipc_ids.rwsem (as a writer) and the spinlock for this ID are held
  *     before this function is called, and remain locked on the exit.
  */
  
@@ -621,7 +630,7 @@ struct kern_ipc_perm *ipc_obtain_object(struct ipc_ids *ids, int id)
 }
 
 /**
- * ipc_lock - Lock an ipc structure without rw_mutex held
+ * ipc_lock - Lock an ipc structure without rwsem held
  * @ids: IPC identifier set
  * @id: ipc id to look for
  *
@@ -677,22 +686,6 @@ out:
        return out;
 }
 
-struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id)
-{
-       struct kern_ipc_perm *out;
-
-       out = ipc_lock(ids, id);
-       if (IS_ERR(out))
-               return out;
-
-       if (ipc_checkid(out, id)) {
-               ipc_unlock(out);
-               return ERR_PTR(-EIDRM);
-       }
-
-       return out;
-}
-
 /**
  * ipcget - Common sys_*get() code
  * @ns : namsepace
@@ -733,7 +726,7 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
 }
 
 /**
- * ipcctl_pre_down - retrieve an ipc and check permissions for some IPC_XXX cmd
+ * ipcctl_pre_down_nolock - retrieve an ipc and check permissions for some IPC_XXX cmd
  * @ns:  the ipc namespace
  * @ids:  the table of ids where to look for the ipc
  * @id:   the id of the ipc to retrieve
@@ -746,29 +739,13 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out)
  * It must be called without any lock held and
  *  - retrieves the ipc with the given id in the given table.
  *  - performs some audit and permission check, depending on the given cmd
- *  - returns the ipc with the ipc lock held in case of success
- *    or an err-code without any lock held otherwise.
+ *  - returns a pointer to the ipc object or otherwise, the corresponding error.
  *
- * Call holding the both the rw_mutex and the rcu read lock.
+ * Call holding the both the rwsem and the rcu read lock.
  */
-struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
-                                     struct ipc_ids *ids, int id, int cmd,
-                                     struct ipc64_perm *perm, int extra_perm)
-{
-       struct kern_ipc_perm *ipcp;
-
-       ipcp = ipcctl_pre_down_nolock(ns, ids, id, cmd, perm, extra_perm);
-       if (IS_ERR(ipcp))
-               goto out;
-
-       spin_lock(&ipcp->lock);
-out:
-       return ipcp;
-}
-
 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
-                                            struct ipc_ids *ids, int id, int cmd,
-                                            struct ipc64_perm *perm, int extra_perm)
+                                       struct ipc_ids *ids, int id, int cmd,
+                                       struct ipc64_perm *perm, int extra_perm)
 {
        kuid_t euid;
        int err = -EPERM;
@@ -846,7 +823,8 @@ static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos,
                ipc = idr_find(&ids->ipcs_idr, pos);
                if (ipc != NULL) {
                        *new_pos = pos + 1;
-                       ipc_lock_by_ptr(ipc);
+                       rcu_read_lock();
+                       ipc_lock_object(ipc);
                        return ipc;
                }
        }
@@ -884,7 +862,7 @@ static void *sysvipc_proc_start(struct seq_file *s, loff_t *pos)
         * Take the lock - this will be released by the corresponding
         * call to stop().
         */
-       down_read(&ids->rw_mutex);
+       down_read(&ids->rwsem);
 
        /* pos < 0 is invalid */
        if (*pos < 0)
@@ -911,7 +889,7 @@ static void sysvipc_proc_stop(struct seq_file *s, void *it)
 
        ids = &iter->ns->ids[iface->ids];
        /* Release the lock we took in start() */
-       up_read(&ids->rw_mutex);
+       up_read(&ids->rwsem);
 }
 
 static int sysvipc_proc_show(struct seq_file *s, void *it)
index b6a6a88..c5f3338 100644 (file)
@@ -94,10 +94,10 @@ void __init ipc_init_proc_interface(const char *path, const char *header,
 #define ipcid_to_idx(id) ((id) % SEQ_MULTIPLIER)
 #define ipcid_to_seqx(id) ((id) / SEQ_MULTIPLIER)
 
-/* must be called with ids->rw_mutex acquired for writing */
+/* must be called with ids->rwsem acquired for writing */
 int ipc_addid(struct ipc_ids *, struct kern_ipc_perm *, int);
 
-/* must be called with ids->rw_mutex acquired for reading */
+/* must be called with ids->rwsem acquired for reading */
 int ipc_get_maxid(struct ipc_ids *);
 
 /* must be called with both locks acquired. */
@@ -131,9 +131,6 @@ int ipc_update_perm(struct ipc64_perm *in, struct kern_ipc_perm *out);
 struct kern_ipc_perm *ipcctl_pre_down_nolock(struct ipc_namespace *ns,
                                             struct ipc_ids *ids, int id, int cmd,
                                             struct ipc64_perm *perm, int extra_perm);
-struct kern_ipc_perm *ipcctl_pre_down(struct ipc_namespace *ns,
-                                     struct ipc_ids *ids, int id, int cmd,
-                                     struct ipc64_perm *perm, int extra_perm);
 
 #ifndef CONFIG_ARCH_WANT_IPC_PARSE_VERSION
   /* On IA-64, we always use the "64-bit version" of the IPC structures.  */ 
@@ -174,19 +171,12 @@ static inline void ipc_assert_locked_object(struct kern_ipc_perm *perm)
        assert_spin_locked(&perm->lock);
 }
 
-static inline void ipc_lock_by_ptr(struct kern_ipc_perm *perm)
-{
-       rcu_read_lock();
-       ipc_lock_object(perm);
-}
-
 static inline void ipc_unlock(struct kern_ipc_perm *perm)
 {
        ipc_unlock_object(perm);
        rcu_read_unlock();
 }
 
-struct kern_ipc_perm *ipc_lock_check(struct ipc_ids *ids, int id);
 struct kern_ipc_perm *ipc_obtain_object_check(struct ipc_ids *ids, int id);
 int ipcget(struct ipc_namespace *ns, struct ipc_ids *ids,
                        struct ipc_ops *ops, struct ipc_params *params);
index 35ef118..1ce4755 100644 (file)
@@ -26,6 +26,7 @@ obj-y += sched/
 obj-y += power/
 obj-y += printk/
 obj-y += cpu/
+obj-y += irq/
 
 obj-$(CONFIG_CHECKPOINT_RESTORE) += kcmp.o
 obj-$(CONFIG_FREEZER) += freezer.o
@@ -79,7 +80,6 @@ obj-$(CONFIG_KPROBES) += kprobes.o
 obj-$(CONFIG_KGDB) += debug/
 obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
 obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
-obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
 obj-$(CONFIG_SECCOMP) += seccomp.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += rcutree.o
index 2207efc..dd236b6 100644 (file)
@@ -5039,6 +5039,7 @@ static void perf_event_mmap_output(struct perf_event *event,
                mmap_event->event_id.header.size += sizeof(mmap_event->maj);
                mmap_event->event_id.header.size += sizeof(mmap_event->min);
                mmap_event->event_id.header.size += sizeof(mmap_event->ino);
+               mmap_event->event_id.header.size += sizeof(mmap_event->ino_generation);
        }
 
        perf_event_header__init_id(&mmap_event->event_id.header, &sample, event);
index f356974..ad8e1bd 100644 (file)
@@ -1682,12 +1682,10 @@ static bool handle_trampoline(struct pt_regs *regs)
                tmp = ri;
                ri = ri->next;
                kfree(tmp);
+               utask->depth--;
 
                if (!chained)
                        break;
-
-               utask->depth--;
-
                BUG_ON(!ri);
        }
 
index 67460b9..832cb28 100644 (file)
@@ -41,7 +41,7 @@ u32 __initdata main_extable_sort_needed = 1;
 /* Sort the kernel's built-in exception table */
 void __init sort_main_extable(void)
 {
-       if (main_extable_sort_needed) {
+       if (main_extable_sort_needed && __stop___ex_table > __start___ex_table) {
                pr_notice("Sorting __ex_table...\n");
                sort_extable(__start___ex_table, __stop___ex_table);
        }
index c9eaf20..086fe73 100644 (file)
@@ -351,7 +351,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
        struct rb_node **rb_link, *rb_parent;
        int retval;
        unsigned long charge;
-       struct mempolicy *pol;
 
        uprobe_start_dup_mmap();
        down_write(&oldmm->mmap_sem);
@@ -400,11 +399,9 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
                        goto fail_nomem;
                *tmp = *mpnt;
                INIT_LIST_HEAD(&tmp->anon_vma_chain);
-               pol = mpol_dup(vma_policy(mpnt));
-               retval = PTR_ERR(pol);
-               if (IS_ERR(pol))
+               retval = vma_dup_policy(mpnt, tmp);
+               if (retval)
                        goto fail_nomem_policy;
-               vma_set_policy(tmp, pol);
                tmp->vm_mm = mm;
                if (anon_vma_fork(tmp, mpnt))
                        goto fail_nomem_anon_vma_fork;
@@ -472,7 +469,7 @@ out:
        uprobe_end_dup_mmap();
        return retval;
 fail_nomem_anon_vma_fork:
-       mpol_put(pol);
+       mpol_put(vma_policy(tmp));
 fail_nomem_policy:
        kmem_cache_free(vm_area_cachep, tmp);
 fail_nomem:
@@ -522,7 +519,7 @@ static void mm_init_aio(struct mm_struct *mm)
 {
 #ifdef CONFIG_AIO
        spin_lock_init(&mm->ioctx_lock);
-       INIT_HLIST_HEAD(&mm->ioctx_list);
+       mm->ioctx_table = NULL;
 #endif
 }
 
@@ -1173,13 +1170,16 @@ static struct task_struct *copy_process(unsigned long clone_flags,
                return ERR_PTR(-EINVAL);
 
        /*
-        * If the new process will be in a different pid namespace
-        * don't allow the creation of threads.
+        * If the new process will be in a different pid or user namespace
+        * do not allow it to share a thread group or signal handlers or
+        * parent with the forking task.
         */
-       if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
-           (task_active_pid_ns(current) !=
-            current->nsproxy->pid_ns_for_children))
-               return ERR_PTR(-EINVAL);
+       if (clone_flags & (CLONE_SIGHAND | CLONE_PARENT)) {
+               if ((clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) ||
+                   (task_active_pid_ns(current) !=
+                               current->nsproxy->pid_ns_for_children))
+                       return ERR_PTR(-EINVAL);
+       }
 
        retval = security_task_create(clone_flags);
        if (retval)
@@ -1575,15 +1575,6 @@ long do_fork(unsigned long clone_flags,
        int trace = 0;
        long nr;
 
-       /*
-        * Do some preliminary argument and permissions checking before we
-        * actually start allocating stuff
-        */
-       if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
-               if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
-                       return -EINVAL;
-       }
-
        /*
         * Determine whether and which event to report to ptracer.  When
         * called from kernel_thread or CLONE_UNTRACED is explicitly
index 9bd0934..7a7d2ee 100644 (file)
@@ -74,7 +74,7 @@ static int __init gcov_persist_setup(char *str)
 {
        unsigned long val;
 
-       if (strict_strtoul(str, 0, &val)) {
+       if (kstrtoul(str, 0, &val)) {
                pr_warning("invalid gcov_persist parameter '%s'\n", str);
                return 0;
        }
index d1a758b..4a1fef0 100644 (file)
@@ -1,15 +1,4 @@
-# Select this to activate the generic irq options below
-config HAVE_GENERIC_HARDIRQS
-       bool
-
-if HAVE_GENERIC_HARDIRQS
 menu "IRQ subsystem"
-#
-# Interrupt subsystem related configuration options
-#
-config GENERIC_HARDIRQS
-       def_bool y
-
 # Options selectable by the architecture code
 
 # Make sparse irq Kconfig switch below available
@@ -84,4 +73,3 @@ config SPARSE_IRQ
          If you don't know what to do here, say N.
 
 endmenu
-endif
index 59f7b55..2a74f30 100644 (file)
@@ -1474,11 +1474,8 @@ static int __init __parse_crashkernel(char *cmdline,
        if (first_colon && (!first_space || first_colon < first_space))
                return parse_crashkernel_mem(ck_cmdline, system_ram,
                                crash_size, crash_base);
-       else
-               return parse_crashkernel_simple(ck_cmdline, crash_size,
-                               crash_base);
 
-       return 0;
+       return parse_crashkernel_simple(ck_cmdline, crash_size, crash_base);
 }
 
 /*
index 6e33498..a0d367a 100644 (file)
@@ -112,6 +112,7 @@ static struct kprobe_blackpoint kprobe_blacklist[] = {
 struct kprobe_insn_page {
        struct list_head list;
        kprobe_opcode_t *insns;         /* Page of instruction slots */
+       struct kprobe_insn_cache *cache;
        int nused;
        int ngarbage;
        char slot_used[];
@@ -121,12 +122,6 @@ struct kprobe_insn_page {
        (offsetof(struct kprobe_insn_page, slot_used) + \
         (sizeof(char) * (slots)))
 
-struct kprobe_insn_cache {
-       struct list_head pages; /* list of kprobe_insn_page */
-       size_t insn_size;       /* size of instruction slot */
-       int nr_garbage;
-};
-
 static int slots_per_page(struct kprobe_insn_cache *c)
 {
        return PAGE_SIZE/(c->insn_size * sizeof(kprobe_opcode_t));
@@ -138,8 +133,20 @@ enum kprobe_slot_state {
        SLOT_USED = 2,
 };
 
-static DEFINE_MUTEX(kprobe_insn_mutex);        /* Protects kprobe_insn_slots */
-static struct kprobe_insn_cache kprobe_insn_slots = {
+static void *alloc_insn_page(void)
+{
+       return module_alloc(PAGE_SIZE);
+}
+
+static void free_insn_page(void *page)
+{
+       module_free(NULL, page);
+}
+
+struct kprobe_insn_cache kprobe_insn_slots = {
+       .mutex = __MUTEX_INITIALIZER(kprobe_insn_slots.mutex),
+       .alloc = alloc_insn_page,
+       .free = free_insn_page,
        .pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
        .insn_size = MAX_INSN_SIZE,
        .nr_garbage = 0,
@@ -150,10 +157,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c);
  * __get_insn_slot() - Find a slot on an executable page for an instruction.
  * We allocate an executable page if there's no room on existing ones.
  */
-static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
+kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
 {
        struct kprobe_insn_page *kip;
+       kprobe_opcode_t *slot = NULL;
 
+       mutex_lock(&c->mutex);
  retry:
        list_for_each_entry(kip, &c->pages, list) {
                if (kip->nused < slots_per_page(c)) {
@@ -162,7 +171,8 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
                                if (kip->slot_used[i] == SLOT_CLEAN) {
                                        kip->slot_used[i] = SLOT_USED;
                                        kip->nused++;
-                                       return kip->insns + (i * c->insn_size);
+                                       slot = kip->insns + (i * c->insn_size);
+                                       goto out;
                                }
                        }
                        /* kip->nused is broken. Fix it. */
@@ -178,37 +188,29 @@ static kprobe_opcode_t __kprobes *__get_insn_slot(struct kprobe_insn_cache *c)
        /* All out of space.  Need to allocate a new page. */
        kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
        if (!kip)
-               return NULL;
+               goto out;
 
        /*
         * Use module_alloc so this page is within +/- 2GB of where the
         * kernel image and loaded module images reside. This is required
         * so x86_64 can correctly handle the %rip-relative fixups.
         */
-       kip->insns = module_alloc(PAGE_SIZE);
+       kip->insns = c->alloc();
        if (!kip->insns) {
                kfree(kip);
-               return NULL;
+               goto out;
        }
        INIT_LIST_HEAD(&kip->list);
        memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
        kip->slot_used[0] = SLOT_USED;
        kip->nused = 1;
        kip->ngarbage = 0;
+       kip->cache = c;
        list_add(&kip->list, &c->pages);
-       return kip->insns;
-}
-
-
-kprobe_opcode_t __kprobes *get_insn_slot(void)
-{
-       kprobe_opcode_t *ret = NULL;
-
-       mutex_lock(&kprobe_insn_mutex);
-       ret = __get_insn_slot(&kprobe_insn_slots);
-       mutex_unlock(&kprobe_insn_mutex);
-
-       return ret;
+       slot = kip->insns;
+out:
+       mutex_unlock(&c->mutex);
+       return slot;
 }
 
 /* Return 1 if all garbages are collected, otherwise 0. */
@@ -225,7 +227,7 @@ static int __kprobes collect_one_slot(struct kprobe_insn_page *kip, int idx)
                 */
                if (!list_is_singular(&kip->list)) {
                        list_del(&kip->list);
-                       module_free(NULL, kip->insns);
+                       kip->cache->free(kip->insns);
                        kfree(kip);
                }
                return 1;
@@ -255,11 +257,12 @@ static int __kprobes collect_garbage_slots(struct kprobe_insn_cache *c)
        return 0;
 }
 
-static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
-                                      kprobe_opcode_t *slot, int dirty)
+void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
+                               kprobe_opcode_t *slot, int dirty)
 {
        struct kprobe_insn_page *kip;
 
+       mutex_lock(&c->mutex);
        list_for_each_entry(kip, &c->pages, list) {
                long idx = ((long)slot - (long)kip->insns) /
                                (c->insn_size * sizeof(kprobe_opcode_t));
@@ -272,45 +275,25 @@ static void __kprobes __free_insn_slot(struct kprobe_insn_cache *c,
                                        collect_garbage_slots(c);
                        } else
                                collect_one_slot(kip, idx);
-                       return;
+                       goto out;
                }
        }
        /* Could not free this slot. */
        WARN_ON(1);
+out:
+       mutex_unlock(&c->mutex);
 }
 
-void __kprobes free_insn_slot(kprobe_opcode_t * slot, int dirty)
-{
-       mutex_lock(&kprobe_insn_mutex);
-       __free_insn_slot(&kprobe_insn_slots, slot, dirty);
-       mutex_unlock(&kprobe_insn_mutex);
-}
 #ifdef CONFIG_OPTPROBES
 /* For optimized_kprobe buffer */
-static DEFINE_MUTEX(kprobe_optinsn_mutex); /* Protects kprobe_optinsn_slots */
-static struct kprobe_insn_cache kprobe_optinsn_slots = {
+struct kprobe_insn_cache kprobe_optinsn_slots = {
+       .mutex = __MUTEX_INITIALIZER(kprobe_optinsn_slots.mutex),
+       .alloc = alloc_insn_page,
+       .free = free_insn_page,
        .pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
        /* .insn_size is initialized later */
        .nr_garbage = 0,
 };
-/* Get a slot for optimized_kprobe buffer */
-kprobe_opcode_t __kprobes *get_optinsn_slot(void)
-{
-       kprobe_opcode_t *ret = NULL;
-
-       mutex_lock(&kprobe_optinsn_mutex);
-       ret = __get_insn_slot(&kprobe_optinsn_slots);
-       mutex_unlock(&kprobe_optinsn_mutex);
-
-       return ret;
-}
-
-void __kprobes free_optinsn_slot(kprobe_opcode_t * slot, int dirty)
-{
-       mutex_lock(&kprobe_optinsn_mutex);
-       __free_insn_slot(&kprobe_optinsn_slots, slot, dirty);
-       mutex_unlock(&kprobe_optinsn_mutex);
-}
 #endif
 #endif
 
index 6ada93c..9659d38 100644 (file)
@@ -113,7 +113,7 @@ static ssize_t kexec_crash_size_store(struct kobject *kobj,
        unsigned long cnt;
        int ret;
 
-       if (strict_strtoul(buf, 0, &cnt))
+       if (kstrtoul(buf, 0, &cnt))
                return -EINVAL;
 
        ret = crash_shrink_memory(cnt);
index 2b6e699..7cbd450 100644 (file)
 
 struct key *modsign_keyring;
 
-extern __initdata const u8 modsign_certificate_list[];
-extern __initdata const u8 modsign_certificate_list_end[];
+extern __initconst const u8 modsign_certificate_list[];
+extern __initconst const u8 modsign_certificate_list_end[];
 
 /*
  * We need to make sure ccache doesn't cache the .o file as it doesn't notice
  * if modsign.pub changes.
  */
-static __initdata const char annoy_ccache[] = __TIME__ "foo";
+static __initconst const char annoy_ccache[] = __TIME__ "foo";
 
 /*
  * Load the compiled-in keys
index 8018646..b6c482c 100644 (file)
@@ -123,10 +123,14 @@ void panic(const char *fmt, ...)
         */
        smp_send_stop();
 
-       kmsg_dump(KMSG_DUMP_PANIC);
-
+       /*
+        * Run any panic handlers, including those that might need to
+        * add information to the kmsg dump output.
+        */
        atomic_notifier_call_chain(&panic_notifier_list, 0, buf);
 
+       kmsg_dump(KMSG_DUMP_PANIC);
+
        bust_spinlocks(0);
 
        if (!panic_blink)
index 501bde4..81c4e78 100644 (file)
@@ -253,13 +253,13 @@ int parse_args(const char *doing,
        EXPORT_SYMBOL(param_ops_##name)
 
 
-STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, strict_strtoul);
-STANDARD_PARAM_DEF(short, short, "%hi", long, strict_strtol);
-STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, strict_strtoul);
-STANDARD_PARAM_DEF(int, int, "%i", long, strict_strtol);
-STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, strict_strtoul);
-STANDARD_PARAM_DEF(long, long, "%li", long, strict_strtol);
-STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, strict_strtoul);
+STANDARD_PARAM_DEF(byte, unsigned char, "%hhu", unsigned long, kstrtoul);
+STANDARD_PARAM_DEF(short, short, "%hi", long, kstrtoul);
+STANDARD_PARAM_DEF(ushort, unsigned short, "%hu", unsigned long, kstrtoul);
+STANDARD_PARAM_DEF(int, int, "%i", long, kstrtoul);
+STANDARD_PARAM_DEF(uint, unsigned int, "%u", unsigned long, kstrtoul);
+STANDARD_PARAM_DEF(long, long, "%li", long, kstrtoul);
+STANDARD_PARAM_DEF(ulong, unsigned long, "%lu", unsigned long, kstrtoul);
 
 int param_set_charp(const char *val, const struct kernel_param *kp)
 {
index 3085e62..c9c759d 100644 (file)
@@ -644,22 +644,23 @@ int hibernate(void)
        if (error)
                goto Exit;
 
-       /* Allocate memory management structures */
-       error = create_basic_memory_bitmaps();
-       if (error)
-               goto Exit;
-
        printk(KERN_INFO "PM: Syncing filesystems ... ");
        sys_sync();
        printk("done.\n");
 
        error = freeze_processes();
        if (error)
-               goto Free_bitmaps;
+               goto Exit;
+
+       lock_device_hotplug();
+       /* Allocate memory management structures */
+       error = create_basic_memory_bitmaps();
+       if (error)
+               goto Thaw;
 
        error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM);
        if (error || freezer_test_done)
-               goto Thaw;
+               goto Free_bitmaps;
 
        if (in_suspend) {
                unsigned int flags = 0;
@@ -682,14 +683,14 @@ int hibernate(void)
                pr_debug("PM: Image restored successfully.\n");
        }
 
+ Free_bitmaps:
+       free_basic_memory_bitmaps();
  Thaw:
+       unlock_device_hotplug();
        thaw_processes();
 
        /* Don't bother checking whether freezer_test_done is true */
        freezer_test_done = false;
-
- Free_bitmaps:
-       free_basic_memory_bitmaps();
  Exit:
        pm_notifier_call_chain(PM_POST_HIBERNATION);
        pm_restore_console();
@@ -806,21 +807,20 @@ static int software_resume(void)
        pm_prepare_console();
        error = pm_notifier_call_chain(PM_RESTORE_PREPARE);
        if (error)
-               goto close_finish;
-
-       error = create_basic_memory_bitmaps();
-       if (error)
-               goto close_finish;
+               goto Close_Finish;
 
        pr_debug("PM: Preparing processes for restore.\n");
        error = freeze_processes();
-       if (error) {
-               swsusp_close(FMODE_READ);
-               goto Done;
-       }
+       if (error)
+               goto Close_Finish;
 
        pr_debug("PM: Loading hibernation image.\n");
 
+       lock_device_hotplug();
+       error = create_basic_memory_bitmaps();
+       if (error)
+               goto Thaw;
+
        error = swsusp_read(&flags);
        swsusp_close(FMODE_READ);
        if (!error)
@@ -828,9 +828,10 @@ static int software_resume(void)
 
        printk(KERN_ERR "PM: Failed to load hibernation image, recovering.\n");
        swsusp_free();
-       thaw_processes();
- Done:
        free_basic_memory_bitmaps();
+ Thaw:
+       unlock_device_hotplug();
+       thaw_processes();
  Finish:
        pm_notifier_call_chain(PM_POST_RESTORE);
        pm_restore_console();
@@ -840,7 +841,7 @@ static int software_resume(void)
        mutex_unlock(&pm_mutex);
        pr_debug("PM: Hibernation image not present or could not be loaded.\n");
        return error;
-close_finish:
+ Close_Finish:
        swsusp_close(FMODE_READ);
        goto Finish;
 }
index 349587b..358a146 100644 (file)
@@ -352,7 +352,7 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask)
                struct mem_extent *ext, *cur, *aux;
 
                zone_start = zone->zone_start_pfn;
-               zone_end = zone->zone_start_pfn + zone->spanned_pages;
+               zone_end = zone_end_pfn(zone);
 
                list_for_each_entry(ext, list, hook)
                        if (zone_start <= ext->end)
@@ -884,7 +884,7 @@ static unsigned int count_highmem_pages(void)
                        continue;
 
                mark_free_pages(zone);
-               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (saveable_highmem_page(zone, pfn))
                                n++;
@@ -948,7 +948,7 @@ static unsigned int count_data_pages(void)
                        continue;
 
                mark_free_pages(zone);
-               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (saveable_page(zone, pfn))
                                n++;
@@ -1041,7 +1041,7 @@ copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm)
                unsigned long max_zone_pfn;
 
                mark_free_pages(zone);
-               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (page_is_saveable(zone, pfn))
                                memory_bm_set_bit(orig_bm, pfn);
@@ -1093,7 +1093,7 @@ void swsusp_free(void)
        unsigned long pfn, max_zone_pfn;
 
        for_each_populated_zone(zone) {
-               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn)) {
                                struct page *page = pfn_to_page(pfn);
@@ -1755,7 +1755,7 @@ static int mark_unsafe_pages(struct memory_bitmap *bm)
 
        /* Clear page flags */
        for_each_populated_zone(zone) {
-               max_zone_pfn = zone->zone_start_pfn + zone->spanned_pages;
+               max_zone_pfn = zone_end_pfn(zone);
                for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++)
                        if (pfn_valid(pfn))
                                swsusp_unset_page_free(pfn_to_page(pfn));
index 4ed81e7..72e8f4f 100644 (file)
@@ -60,11 +60,6 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                error = -ENOSYS;
                goto Unlock;
        }
-       if(create_basic_memory_bitmaps()) {
-               atomic_inc(&snapshot_device_available);
-               error = -ENOMEM;
-               goto Unlock;
-       }
        nonseekable_open(inode, filp);
        data = &snapshot_state;
        filp->private_data = data;
@@ -90,10 +85,9 @@ static int snapshot_open(struct inode *inode, struct file *filp)
                if (error)
                        pm_notifier_call_chain(PM_POST_RESTORE);
        }
-       if (error) {
-               free_basic_memory_bitmaps();
+       if (error)
                atomic_inc(&snapshot_device_available);
-       }
+
        data->frozen = 0;
        data->ready = 0;
        data->platform_support = 0;
@@ -111,11 +105,11 @@ static int snapshot_release(struct inode *inode, struct file *filp)
        lock_system_sleep();
 
        swsusp_free();
-       free_basic_memory_bitmaps();
        data = filp->private_data;
        free_all_swap_pages(data->swap);
        if (data->frozen) {
                pm_restore_gfp_mask();
+               free_basic_memory_bitmaps();
                thaw_processes();
        }
        pm_notifier_call_chain(data->mode == O_RDONLY ?
@@ -207,6 +201,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
        if (!mutex_trylock(&pm_mutex))
                return -EBUSY;
 
+       lock_device_hotplug();
        data = filp->private_data;
 
        switch (cmd) {
@@ -220,14 +215,22 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
                printk("done.\n");
 
                error = freeze_processes();
-               if (!error)
+               if (error)
+                       break;
+
+               error = create_basic_memory_bitmaps();
+               if (error)
+                       thaw_processes();
+               else
                        data->frozen = 1;
+
                break;
 
        case SNAPSHOT_UNFREEZE:
                if (!data->frozen || data->ready)
                        break;
                pm_restore_gfp_mask();
+               free_basic_memory_bitmaps();
                thaw_processes();
                data->frozen = 0;
                break;
@@ -371,6 +374,7 @@ static long snapshot_ioctl(struct file *filp, unsigned int cmd,
 
        }
 
+       unlock_device_hotplug();
        mutex_unlock(&pm_mutex);
 
        return error;
index a146ee3..dd562e9 100644 (file)
@@ -236,7 +236,7 @@ static int __ptrace_may_access(struct task_struct *task, unsigned int mode)
         */
        int dumpable = 0;
        /* Don't let security modules deny introspection */
-       if (task == current)
+       if (same_thread_group(task, current))
                return 0;
        rcu_read_lock();
        tcred = __task_cred(task);
index ff55247..4aa8a30 100644 (file)
@@ -17,8 +17,8 @@
 void res_counter_init(struct res_counter *counter, struct res_counter *parent)
 {
        spin_lock_init(&counter->lock);
-       counter->limit = RESOURCE_MAX;
-       counter->soft_limit = RESOURCE_MAX;
+       counter->limit = RES_COUNTER_MAX;
+       counter->soft_limit = RES_COUNTER_MAX;
        counter->parent = parent;
 }
 
@@ -178,23 +178,30 @@ u64 res_counter_read_u64(struct res_counter *counter, int member)
 #endif
 
 int res_counter_memparse_write_strategy(const char *buf,
-                                       unsigned long long *res)
+                                       unsigned long long *resp)
 {
        char *end;
+       unsigned long long res;
 
-       /* return RESOURCE_MAX(unlimited) if "-1" is specified */
+       /* return RES_COUNTER_MAX(unlimited) if "-1" is specified */
        if (*buf == '-') {
-               *res = simple_strtoull(buf + 1, &end, 10);
-               if (*res != 1 || *end != '\0')
+               res = simple_strtoull(buf + 1, &end, 10);
+               if (res != 1 || *end != '\0')
                        return -EINVAL;
-               *res = RESOURCE_MAX;
+               *resp = RES_COUNTER_MAX;
                return 0;
        }
 
-       *res = memparse(buf, &end);
+       res = memparse(buf, &end);
        if (*end != '\0')
                return -EINVAL;
 
-       *res = PAGE_ALIGN(*res);
+       if (PAGE_ALIGN(res) >= res)
+               res = PAGE_ALIGN(res);
+       else
+               res = RES_COUNTER_MAX;
+
+       *resp = res;
+
        return 0;
 }
index e076bdd..1965599 100644 (file)
@@ -124,7 +124,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
                SEQ_printf(m, " ");
 
        SEQ_printf(m, "%15s %5d %9Ld.%06ld %9Ld %5d ",
-               p->comm, p->pid,
+               p->comm, task_pid_nr(p),
                SPLIT_NS(p->se.vruntime),
                (long long)(p->nvcsw + p->nivcsw),
                p->prio);
@@ -289,7 +289,7 @@ do {                                                                        \
        P(nr_load_updates);
        P(nr_uninterruptible);
        PN(next_balance);
-       P(curr->pid);
+       SEQ_printf(m, "  .%-30s: %ld\n", "curr->pid", (long)(task_pid_nr(rq->curr)));
        PN(clock);
        P(cpu_load[0]);
        P(cpu_load[1]);
@@ -492,7 +492,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 {
        unsigned long nr_switches;
 
-       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, p->pid,
+       SEQ_printf(m, "%s (%d, #threads: %d)\n", p->comm, task_pid_nr(p),
                                                get_nr_threads(p));
        SEQ_printf(m,
                "---------------------------------------------------------"
index 7f0a5e6..11cd136 100644 (file)
@@ -5151,7 +5151,7 @@ static int should_we_balance(struct lb_env *env)
         * First idle cpu or the first cpu(busiest) in this sched group
         * is eligible for doing load balancing at this and above domains.
         */
-       return balance_cpu != env->dst_cpu;
+       return balance_cpu == env->dst_cpu;
 }
 
 /*
@@ -5928,11 +5928,15 @@ static void task_fork_fair(struct task_struct *p)
        cfs_rq = task_cfs_rq(current);
        curr = cfs_rq->curr;
 
-       if (unlikely(task_cpu(p) != this_cpu)) {
-               rcu_read_lock();
-               __set_task_cpu(p, this_cpu);
-               rcu_read_unlock();
-       }
+       /*
+        * Not only the cpu but also the task_group of the parent might have
+        * been changed after parent->se.parent,cfs_rq were copied to
+        * child->se.parent,cfs_rq. So call __set_task_cpu() to make those
+        * of child point to valid ones.
+        */
+       rcu_read_lock();
+       __set_task_cpu(p, this_cpu);
+       rcu_read_unlock();
 
        update_curr(cfs_rq);
 
index 5aef494..c7edee7 100644 (file)
@@ -104,8 +104,9 @@ static inline void sched_info_queued(struct task_struct *t)
 }
 
 /*
- * Called when a process ceases being the active-running process, either
- * voluntarily or involuntarily.  Now we can calculate how long we ran.
+ * Called when a process ceases being the active-running process involuntarily
+ * due, typically, to expiring its time slice (this may also be called when
+ * switching to the idle task).  Now we can calculate how long we ran.
  * Also, if the process is still in the TASK_RUNNING state, call
  * sched_info_queued() to mark that it has now again started waiting on
  * the runqueue.
index 50e4107..ded28b9 100644 (file)
@@ -3394,7 +3394,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
                new_ka.sa.sa_restorer = compat_ptr(restorer);
 #endif
                ret |= copy_from_user(&mask, &act->sa_mask, sizeof(mask));
-               ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+               ret |= get_user(new_ka.sa.sa_flags, &act->sa_flags);
                if (ret)
                        return -EFAULT;
                sigset_from_compat(&new_ka.sa.sa_mask, &mask);
@@ -3406,7 +3406,7 @@ COMPAT_SYSCALL_DEFINE4(rt_sigaction, int, sig,
                ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), 
                               &oact->sa_handler);
                ret |= copy_to_user(&oact->sa_mask, &mask, sizeof(mask));
-               ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+               ret |= put_user(old_ka.sa.sa_flags, &oact->sa_flags);
 #ifdef __ARCH_HAS_SA_RESTORER
                ret |= put_user(ptr_to_compat(old_ka.sa.sa_restorer),
                                &oact->sa_restorer);
index 449b707..0564571 100644 (file)
@@ -48,10 +48,13 @@ hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu)
                                cpu_to_node(cpu)))
                        return notifier_from_errno(-ENOMEM);
                if (!zalloc_cpumask_var_node(&cfd->cpumask_ipi, GFP_KERNEL,
-                               cpu_to_node(cpu)))
+                               cpu_to_node(cpu))) {
+                       free_cpumask_var(cfd->cpumask);
                        return notifier_from_errno(-ENOMEM);
+               }
                cfd->csd = alloc_percpu(struct call_single_data);
                if (!cfd->csd) {
+                       free_cpumask_var(cfd->cpumask_ipi);
                        free_cpumask_var(cfd->cpumask);
                        return notifier_from_errno(-ENOMEM);
                }
@@ -572,8 +575,10 @@ EXPORT_SYMBOL(on_each_cpu);
  *
  * If @wait is true, then returns once @func has returned.
  *
- * You must not call this function with disabled interrupts or
- * from a hardware interrupt handler or from a bottom half handler.
+ * You must not call this function with disabled interrupts or from a
+ * hardware interrupt handler or from a bottom half handler.  The
+ * exception is that it may be used during early boot while
+ * early_boot_irqs_disabled is set.
  */
 void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
                        void *info, bool wait)
@@ -582,9 +587,10 @@ void on_each_cpu_mask(const struct cpumask *mask, smp_call_func_t func,
 
        smp_call_function_many(mask, func, info, wait);
        if (cpumask_test_cpu(cpu, mask)) {
-               local_irq_disable();
+               unsigned long flags;
+               local_irq_save(flags);
                func(info);
-               local_irq_enable();
+               local_irq_restore(flags);
        }
        put_cpu();
 }
index be3d351..53cc09c 100644 (file)
@@ -876,7 +876,6 @@ int __init __weak early_irq_init(void)
        return 0;
 }
 
-#ifdef CONFIG_GENERIC_HARDIRQS
 int __init __weak arch_probe_nr_irqs(void)
 {
        return NR_IRQS_LEGACY;
@@ -886,4 +885,3 @@ int __init __weak arch_early_irq_init(void)
 {
        return 0;
 }
-#endif
index 5cdd806..4b082b5 100644 (file)
 #else
 #define raw_read_can_lock(l)   read_can_lock(l)
 #define raw_write_can_lock(l)  write_can_lock(l)
+
+/*
+ * Some architectures can relax in favour of the CPU owning the lock.
+ */
+#ifndef arch_read_relax
+# define arch_read_relax(l)    cpu_relax()
+#endif
+#ifndef arch_write_relax
+# define arch_write_relax(l)   cpu_relax()
+#endif
+#ifndef arch_spin_relax
+# define arch_spin_relax(l)    cpu_relax()
+#endif
+
 /*
  * We build the __lock_function inlines here. They are too large for
  * inlining all over the place, but here is only one user per function
index 07f6fc4..b2f06f3 100644 (file)
@@ -1225,7 +1225,7 @@ static struct ctl_table vm_table[] = {
                .data           = &hugepages_treat_as_movable,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = hugetlb_treat_movable_handler,
+               .proc_handler   = proc_dointvec,
        },
        {
                .procname       = "nr_overcommit_hugepages",
@@ -1471,14 +1471,14 @@ static struct ctl_table fs_table[] = {
        {
                .procname       = "inode-nr",
                .data           = &inodes_stat,
-               .maxlen         = 2*sizeof(int),
+               .maxlen         = 2*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_inodes,
        },
        {
                .procname       = "inode-state",
                .data           = &inodes_stat,
-               .maxlen         = 7*sizeof(int),
+               .maxlen         = 7*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_inodes,
        },
@@ -1508,7 +1508,7 @@ static struct ctl_table fs_table[] = {
        {
                .procname       = "dentry-state",
                .data           = &dentry_stat,
-               .maxlen         = 6*sizeof(int),
+               .maxlen         = 6*sizeof(long),
                .mode           = 0444,
                .proc_handler   = proc_nr_dentry,
        },
index 65bd3c9..8727032 100644 (file)
@@ -4,6 +4,23 @@
 
 static struct callback_head work_exited; /* all we need is ->next == NULL */
 
+/**
+ * task_work_add - ask the @task to execute @work->func()
+ * @task: the task which should run the callback
+ * @work: the callback to run
+ * @notify: send the notification if true
+ *
+ * Queue @work for task_work_run() below and notify the @task if @notify.
+ * Fails if the @task is exiting/exited and thus it can't process this @work.
+ * Otherwise @work->func() will be called when the @task returns from kernel
+ * mode or exits.
+ *
+ * This is like the signal handler which runs in kernel mode, but it doesn't
+ * try to wake up the @task.
+ *
+ * RETURNS:
+ * 0 if succeeds or -ESRCH.
+ */
 int
 task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
 {
@@ -21,11 +38,22 @@ task_work_add(struct task_struct *task, struct callback_head *work, bool notify)
        return 0;
 }
 
+/**
+ * task_work_cancel - cancel a pending work added by task_work_add()
+ * @task: the task which should execute the work
+ * @func: identifies the work to remove
+ *
+ * Find the last queued pending work with ->func == @func and remove
+ * it from queue.
+ *
+ * RETURNS:
+ * The found work or NULL if not found.
+ */
 struct callback_head *
 task_work_cancel(struct task_struct *task, task_work_func_t func)
 {
        struct callback_head **pprev = &task->task_works;
-       struct callback_head *work = NULL;
+       struct callback_head *work;
        unsigned long flags;
        /*
         * If cmpxchg() fails we continue without updating pprev.
@@ -35,7 +63,7 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
         */
        raw_spin_lock_irqsave(&task->pi_lock, flags);
        while ((work = ACCESS_ONCE(*pprev))) {
-               read_barrier_depends();
+               smp_read_barrier_depends();
                if (work->func != func)
                        pprev = &work->next;
                else if (cmpxchg(pprev, work, work->next) == work)
@@ -46,6 +74,14 @@ task_work_cancel(struct task_struct *task, task_work_func_t func)
        return work;
 }
 
+/**
+ * task_work_run - execute the works added by task_work_add()
+ *
+ * Flush the pending works. Should be used by the core kernel code.
+ * Called before the task returns to the user-mode or stops, or when
+ * it exits. In the latter case task_work_add() can no longer add the
+ * new work after task_work_run() returns.
+ */
 void task_work_run(void)
 {
        struct task_struct *task = current;
index 8f5b3b9..bb22151 100644 (file)
@@ -516,13 +516,13 @@ static void sync_cmos_clock(struct work_struct *work)
        schedule_delayed_work(&sync_cmos_work, timespec_to_jiffies(&next));
 }
 
-static void notify_cmos_timer(void)
+void ntp_notify_cmos_timer(void)
 {
        schedule_delayed_work(&sync_cmos_work, 0);
 }
 
 #else
-static inline void notify_cmos_timer(void) { }
+void ntp_notify_cmos_timer(void) { }
 #endif
 
 
@@ -687,8 +687,6 @@ int __do_adjtimex(struct timex *txc, struct timespec *ts, s32 *time_tai)
        if (!(time_status & STA_NANO))
                txc->time.tv_usec /= NSEC_PER_USEC;
 
-       notify_cmos_timer();
-
        return result;
 }
 
index 48b9fff..947ba25 100644 (file)
@@ -1703,6 +1703,8 @@ int do_adjtimex(struct timex *txc)
        write_seqcount_end(&timekeeper_seq);
        raw_spin_unlock_irqrestore(&timekeeper_lock, flags);
 
+       ntp_notify_cmos_timer();
+
        return ret;
 }
 
index c54c75e..630d72b 100644 (file)
 int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
                                int wait)
 {
+       unsigned long flags;
+
        WARN_ON(cpu != 0);
 
-       local_irq_disable();
-       (func)(info);
-       local_irq_enable();
+       local_irq_save(flags);
+       func(info);
+       local_irq_restore(flags);
 
        return 0;
 }
 EXPORT_SYMBOL(smp_call_function_single);
+
+int on_each_cpu(smp_call_func_t func, void *info, int wait)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       func(info);
+       local_irq_restore(flags);
+       return 0;
+}
+EXPORT_SYMBOL(on_each_cpu);
+
+/*
+ * Note we still need to test the mask even for UP
+ * because we actually can get an empty mask from
+ * code that on SMP might call us without the local
+ * CPU in the mask.
+ */
+void on_each_cpu_mask(const struct cpumask *mask,
+                     smp_call_func_t func, void *info, bool wait)
+{
+       unsigned long flags;
+
+       if (cpumask_test_cpu(0, mask)) {
+               local_irq_save(flags);
+               func(info);
+               local_irq_restore(flags);
+       }
+}
+EXPORT_SYMBOL(on_each_cpu_mask);
+
+/*
+ * Preemption is disabled here to make sure the cond_func is called under the
+ * same condtions in UP and SMP.
+ */
+void on_each_cpu_cond(bool (*cond_func)(int cpu, void *info),
+                     smp_call_func_t func, void *info, bool wait,
+                     gfp_t gfp_flags)
+{
+       unsigned long flags;
+
+       preempt_disable();
+       if (cond_func(0, info)) {
+               local_irq_save(flags);
+               func(info);
+               local_irq_restore(flags);
+       }
+       preempt_enable();
+}
+EXPORT_SYMBOL(on_each_cpu_cond);
index 652bea9..06344d9 100644 (file)
@@ -597,7 +597,7 @@ endmenu # "Memory Debugging"
 
 config DEBUG_SHIRQ
        bool "Debug shared IRQ handlers"
-       depends on DEBUG_KERNEL && GENERIC_HARDIRQS
+       depends on DEBUG_KERNEL
        help
          Enable this to generate a spurious interrupt as soon as a shared
          interrupt handler is registered, and just before one is deregistered.
@@ -1461,7 +1461,7 @@ config BACKTRACE_SELF_TEST
 
 config RBTREE_TEST
        tristate "Red-Black tree test"
-       depends on m && DEBUG_KERNEL
+       depends on DEBUG_KERNEL
        help
          A benchmark measuring the performance of the rbtree library.
          Also includes rbtree invariant checks.
index f2cb308..f3bb2cb 100644 (file)
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
         sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
         proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
         is_single_threaded.o plist.o decompress.o kobject_uevent.o \
-        earlycpio.o percpu-refcount.o
+        earlycpio.o percpu-refcount.o percpu_ida.o
 
 obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
 lib-$(CONFIG_MMU) += ioremap.o
@@ -25,7 +25,8 @@ obj-y += lockref.o
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
         bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
         gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
-        bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o
+        bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
+        percpu_ida.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
index 5fbed5c..4f134d8 100644 (file)
@@ -8,9 +8,7 @@
  */
 
 #include <linux/cpu_rmap.h>
-#ifdef CONFIG_GENERIC_HARDIRQS
 #include <linux/interrupt.h>
-#endif
 #include <linux/export.h>
 
 /*
@@ -213,8 +211,6 @@ int cpu_rmap_update(struct cpu_rmap *rmap, u16 index,
 }
 EXPORT_SYMBOL(cpu_rmap_update);
 
-#ifdef CONFIG_GENERIC_HARDIRQS
-
 /* Glue between IRQ affinity notifiers and CPU rmaps */
 
 struct irq_glue {
@@ -309,5 +305,3 @@ int irq_cpu_rmap_add(struct cpu_rmap *rmap, int irq)
        return rc;
 }
 EXPORT_SYMBOL(irq_cpu_rmap_add);
-
-#endif /* CONFIG_GENERIC_HARDIRQS */
index 43bc5b0..dfe6ec1 100644 (file)
 #include <linux/err.h>
 #include <linux/init.h>
 #include <crypto/hash.h>
+#include <linux/static_key.h>
 
 static struct crypto_shash *crct10dif_tfm;
+static struct static_key crct10dif_fallback __read_mostly;
 
 __u16 crc_t10dif(const unsigned char *buffer, size_t len)
 {
@@ -25,6 +27,9 @@ __u16 crc_t10dif(const unsigned char *buffer, size_t len)
        } desc;
        int err;
 
+       if (static_key_false(&crct10dif_fallback))
+               return crc_t10dif_generic(0, buffer, len);
+
        desc.shash.tfm = crct10dif_tfm;
        desc.shash.flags = 0;
        *(__u16 *)desc.ctx = 0;
@@ -39,7 +44,11 @@ EXPORT_SYMBOL(crc_t10dif);
 static int __init crc_t10dif_mod_init(void)
 {
        crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
-       return PTR_RET(crct10dif_tfm);
+       if (IS_ERR(crct10dif_tfm)) {
+               static_key_slow_inc(&crct10dif_fallback);
+               crct10dif_tfm = NULL;
+       }
+       return 0;
 }
 
 static void __exit crc_t10dif_mod_fini(void)
index 072fbd8..410093d 100644 (file)
@@ -131,11 +131,14 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 #endif
 
 /**
- * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32
- * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for
- *     other uses, or the previous crc32 value if computing incrementally.
- * @p: pointer to buffer over which CRC is run
+ * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
+ *                     CRC32/CRC32C
+ * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for other
+ *      uses, or the previous crc32/crc32c value if computing incrementally.
+ * @p: pointer to buffer over which CRC32/CRC32C is run
  * @len: length of buffer @p
+ * @tab: little-endian Ethernet table
+ * @polynomial: CRC32/CRC32c LE polynomial
  */
 static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
                                          size_t len, const u32 (*tab)[256],
@@ -201,11 +204,13 @@ EXPORT_SYMBOL(crc32_le);
 EXPORT_SYMBOL(__crc32c_le);
 
 /**
- * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
+ * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
  * @crc: seed value for computation.  ~0 for Ethernet, sometimes 0 for
  *     other uses, or the previous crc32 value if computing incrementally.
- * @p: pointer to buffer over which CRC is run
+ * @p: pointer to buffer over which CRC32 is run
  * @len: length of buffer @p
+ * @tab: big-endian Ethernet table
+ * @polynomial: CRC32 BE polynomial
  */
 static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
                                          size_t len, const u32 (*tab)[256],
index 19ff89e..d619b28 100644 (file)
@@ -48,7 +48,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len,
                out_len = 0x8000; /* 32 K */
                out_buf = malloc(out_len);
        } else {
-               out_len = 0x7fffffff; /* no limit */
+               out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */
        }
        if (!out_buf) {
                error("Out of memory while allocating output buffer");
index b35cfa9..26cf20b 100644 (file)
 #include <linux/of_address.h>
 #include <linux/of_device.h>
 
+static inline size_t chunk_size(const struct gen_pool_chunk *chunk)
+{
+       return chunk->end_addr - chunk->start_addr + 1;
+}
+
 static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set)
 {
        unsigned long val, nval;
@@ -182,13 +187,13 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy
        int nbytes = sizeof(struct gen_pool_chunk) +
                                BITS_TO_LONGS(nbits) * sizeof(long);
 
-       chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid);
+       chunk = kzalloc_node(nbytes, GFP_KERNEL, nid);
        if (unlikely(chunk == NULL))
                return -ENOMEM;
 
        chunk->phys_addr = phys;
        chunk->start_addr = virt;
-       chunk->end_addr = virt + size;
+       chunk->end_addr = virt + size - 1;
        atomic_set(&chunk->avail, size);
 
        spin_lock(&pool->lock);
@@ -213,7 +218,7 @@ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr)
 
        rcu_read_lock();
        list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
-               if (addr >= chunk->start_addr && addr < chunk->end_addr) {
+               if (addr >= chunk->start_addr && addr <= chunk->end_addr) {
                        paddr = chunk->phys_addr + (addr - chunk->start_addr);
                        break;
                }
@@ -242,7 +247,7 @@ void gen_pool_destroy(struct gen_pool *pool)
                chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
                list_del(&chunk->next_chunk);
 
-               end_bit = (chunk->end_addr - chunk->start_addr) >> order;
+               end_bit = chunk_size(chunk) >> order;
                bit = find_next_bit(chunk->bits, end_bit, 0);
                BUG_ON(bit < end_bit);
 
@@ -283,7 +288,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size)
                if (size > atomic_read(&chunk->avail))
                        continue;
 
-               end_bit = (chunk->end_addr - chunk->start_addr) >> order;
+               end_bit = chunk_size(chunk) >> order;
 retry:
                start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits,
                                pool->data);
@@ -330,8 +335,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size)
        nbits = (size + (1UL << order) - 1) >> order;
        rcu_read_lock();
        list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) {
-               if (addr >= chunk->start_addr && addr < chunk->end_addr) {
-                       BUG_ON(addr + size > chunk->end_addr);
+               if (addr >= chunk->start_addr && addr <= chunk->end_addr) {
+                       BUG_ON(addr + size - 1 > chunk->end_addr);
                        start_bit = (addr - chunk->start_addr) >> order;
                        remain = bitmap_clear_ll(chunk->bits, start_bit, nbits);
                        BUG_ON(remain);
@@ -400,7 +405,7 @@ size_t gen_pool_size(struct gen_pool *pool)
 
        rcu_read_lock();
        list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk)
-               size += chunk->end_addr - chunk->start_addr;
+               size += chunk_size(chunk);
        rcu_read_unlock();
        return size;
 }
@@ -519,7 +524,6 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order,
 /**
  * dev_get_gen_pool - Obtain the gen_pool (if any) for a device
  * @dev: device to retrieve the gen_pool from
- * @name: Optional name for the gen_pool, usually NULL
  *
  * Returns the gen_pool for the device if one is present, or NULL.
  */
index 411be80..df6839e 100644 (file)
@@ -283,8 +283,8 @@ _output_error:
        return (int) (-(((char *) ip) - source));
 }
 
-int lz4_decompress(const char *src, size_t *src_len, char *dest,
-               size_t actual_dest_len)
+int lz4_decompress(const unsigned char *src, size_t *src_len,
+               unsigned char *dest, size_t actual_dest_len)
 {
        int ret = -1;
        int input_len = 0;
@@ -302,8 +302,8 @@ exit_0:
 EXPORT_SYMBOL(lz4_decompress);
 #endif
 
-int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
-               char *dest, size_t *dest_len)
+int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len,
+               unsigned char *dest, size_t *dest_len)
 {
        int ret = -1;
        int out_len = 0;
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
new file mode 100644 (file)
index 0000000..bab1ba2
--- /dev/null
@@ -0,0 +1,335 @@
+/*
+ * Percpu IDA library
+ *
+ * Copyright (C) 2013 Datera, Inc. Kent Overstreet
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/bug.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/hardirq.h>
+#include <linux/idr.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/percpu.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/spinlock.h>
+#include <linux/percpu_ida.h>
+
+/*
+ * Number of tags we move between the percpu freelist and the global freelist at
+ * a time
+ */
+#define IDA_PCPU_BATCH_MOVE    32U
+
+/* Max size of percpu freelist, */
+#define IDA_PCPU_SIZE          ((IDA_PCPU_BATCH_MOVE * 3) / 2)
+
+struct percpu_ida_cpu {
+       /*
+        * Even though this is percpu, we need a lock for tag stealing by remote
+        * CPUs:
+        */
+       spinlock_t                      lock;
+
+       /* nr_free/freelist form a stack of free IDs */
+       unsigned                        nr_free;
+       unsigned                        freelist[];
+};
+
+static inline void move_tags(unsigned *dst, unsigned *dst_nr,
+                            unsigned *src, unsigned *src_nr,
+                            unsigned nr)
+{
+       *src_nr -= nr;
+       memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr);
+       *dst_nr += nr;
+}
+
+/*
+ * Try to steal tags from a remote cpu's percpu freelist.
+ *
+ * We first check how many percpu freelists have tags - we don't steal tags
+ * unless enough percpu freelists have tags on them that it's possible more than
+ * half the total tags could be stuck on remote percpu freelists.
+ *
+ * Then we iterate through the cpus until we find some tags - we don't attempt
+ * to find the "best" cpu to steal from, to keep cacheline bouncing to a
+ * minimum.
+ */
+static inline void steal_tags(struct percpu_ida *pool,
+                             struct percpu_ida_cpu *tags)
+{
+       unsigned cpus_have_tags, cpu = pool->cpu_last_stolen;
+       struct percpu_ida_cpu *remote;
+
+       for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
+            cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2;
+            cpus_have_tags--) {
+               cpu = cpumask_next(cpu, &pool->cpus_have_tags);
+
+               if (cpu >= nr_cpu_ids) {
+                       cpu = cpumask_first(&pool->cpus_have_tags);
+                       if (cpu >= nr_cpu_ids)
+                               BUG();
+               }
+
+               pool->cpu_last_stolen = cpu;
+               remote = per_cpu_ptr(pool->tag_cpu, cpu);
+
+               cpumask_clear_cpu(cpu, &pool->cpus_have_tags);
+
+               if (remote == tags)
+                       continue;
+
+               spin_lock(&remote->lock);
+
+               if (remote->nr_free) {
+                       memcpy(tags->freelist,
+                              remote->freelist,
+                              sizeof(unsigned) * remote->nr_free);
+
+                       tags->nr_free = remote->nr_free;
+                       remote->nr_free = 0;
+               }
+
+               spin_unlock(&remote->lock);
+
+               if (tags->nr_free)
+                       break;
+       }
+}
+
+/*
+ * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto
+ * our percpu freelist:
+ */
+static inline void alloc_global_tags(struct percpu_ida *pool,
+                                    struct percpu_ida_cpu *tags)
+{
+       move_tags(tags->freelist, &tags->nr_free,
+                 pool->freelist, &pool->nr_free,
+                 min(pool->nr_free, IDA_PCPU_BATCH_MOVE));
+}
+
+static inline unsigned alloc_local_tag(struct percpu_ida *pool,
+                                      struct percpu_ida_cpu *tags)
+{
+       int tag = -ENOSPC;
+
+       spin_lock(&tags->lock);
+       if (tags->nr_free)
+               tag = tags->freelist[--tags->nr_free];
+       spin_unlock(&tags->lock);
+
+       return tag;
+}
+
+/**
+ * percpu_ida_alloc - allocate a tag
+ * @pool: pool to allocate from
+ * @gfp: gfp flags
+ *
+ * Returns a tag - an integer in the range [0..nr_tags) (passed to
+ * tag_pool_init()), or otherwise -ENOSPC on allocation failure.
+ *
+ * Safe to be called from interrupt context (assuming it isn't passed
+ * __GFP_WAIT, of course).
+ *
+ * @gfp indicates whether or not to wait until a free id is available (it's not
+ * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep
+ * however long it takes until another thread frees an id (same semantics as a
+ * mempool).
+ *
+ * Will not fail if passed __GFP_WAIT.
+ */
+int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
+{
+       DEFINE_WAIT(wait);
+       struct percpu_ida_cpu *tags;
+       unsigned long flags;
+       int tag;
+
+       local_irq_save(flags);
+       tags = this_cpu_ptr(pool->tag_cpu);
+
+       /* Fastpath */
+       tag = alloc_local_tag(pool, tags);
+       if (likely(tag >= 0)) {
+               local_irq_restore(flags);
+               return tag;
+       }
+
+       while (1) {
+               spin_lock(&pool->lock);
+
+               /*
+                * prepare_to_wait() must come before steal_tags(), in case
+                * percpu_ida_free() on another cpu flips a bit in
+                * cpus_have_tags
+                *
+                * global lock held and irqs disabled, don't need percpu lock
+                */
+               prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
+
+               if (!tags->nr_free)
+                       alloc_global_tags(pool, tags);
+               if (!tags->nr_free)
+                       steal_tags(pool, tags);
+
+               if (tags->nr_free) {
+                       tag = tags->freelist[--tags->nr_free];
+                       if (tags->nr_free)
+                               cpumask_set_cpu(smp_processor_id(),
+                                               &pool->cpus_have_tags);
+               }
+
+               spin_unlock(&pool->lock);
+               local_irq_restore(flags);
+
+               if (tag >= 0 || !(gfp & __GFP_WAIT))
+                       break;
+
+               schedule();
+
+               local_irq_save(flags);
+               tags = this_cpu_ptr(pool->tag_cpu);
+       }
+
+       finish_wait(&pool->wait, &wait);
+       return tag;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_alloc);
+
+/**
+ * percpu_ida_free - free a tag
+ * @pool: pool @tag was allocated from
+ * @tag: a tag previously allocated with percpu_ida_alloc()
+ *
+ * Safe to be called from interrupt context.
+ */
+void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
+{
+       struct percpu_ida_cpu *tags;
+       unsigned long flags;
+       unsigned nr_free;
+
+       BUG_ON(tag >= pool->nr_tags);
+
+       local_irq_save(flags);
+       tags = this_cpu_ptr(pool->tag_cpu);
+
+       spin_lock(&tags->lock);
+       tags->freelist[tags->nr_free++] = tag;
+
+       nr_free = tags->nr_free;
+       spin_unlock(&tags->lock);
+
+       if (nr_free == 1) {
+               cpumask_set_cpu(smp_processor_id(),
+                               &pool->cpus_have_tags);
+               wake_up(&pool->wait);
+       }
+
+       if (nr_free == IDA_PCPU_SIZE) {
+               spin_lock(&pool->lock);
+
+               /*
+                * Global lock held and irqs disabled, don't need percpu
+                * lock
+                */
+               if (tags->nr_free == IDA_PCPU_SIZE) {
+                       move_tags(pool->freelist, &pool->nr_free,
+                                 tags->freelist, &tags->nr_free,
+                                 IDA_PCPU_BATCH_MOVE);
+
+                       wake_up(&pool->wait);
+               }
+               spin_unlock(&pool->lock);
+       }
+
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(percpu_ida_free);
+
+/**
+ * percpu_ida_destroy - release a tag pool's resources
+ * @pool: pool to free
+ *
+ * Frees the resources allocated by percpu_ida_init().
+ */
+void percpu_ida_destroy(struct percpu_ida *pool)
+{
+       free_percpu(pool->tag_cpu);
+       free_pages((unsigned long) pool->freelist,
+                  get_order(pool->nr_tags * sizeof(unsigned)));
+}
+EXPORT_SYMBOL_GPL(percpu_ida_destroy);
+
+/**
+ * percpu_ida_init - initialize a percpu tag pool
+ * @pool: pool to initialize
+ * @nr_tags: number of tags that will be available for allocation
+ *
+ * Initializes @pool so that it can be used to allocate tags - integers in the
+ * range [0, nr_tags). Typically, they'll be used by driver code to refer to a
+ * preallocated array of tag structures.
+ *
+ * Allocation is percpu, but sharding is limited by nr_tags - for best
+ * performance, the workload should not span more cpus than nr_tags / 128.
+ */
+int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
+{
+       unsigned i, cpu, order;
+
+       memset(pool, 0, sizeof(*pool));
+
+       init_waitqueue_head(&pool->wait);
+       spin_lock_init(&pool->lock);
+       pool->nr_tags = nr_tags;
+
+       /* Guard against overflow */
+       if (nr_tags > (unsigned) INT_MAX + 1) {
+               pr_err("percpu_ida_init(): nr_tags too large\n");
+               return -EINVAL;
+       }
+
+       order = get_order(nr_tags * sizeof(unsigned));
+       pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order);
+       if (!pool->freelist)
+               return -ENOMEM;
+
+       for (i = 0; i < nr_tags; i++)
+               pool->freelist[i] = i;
+
+       pool->nr_free = nr_tags;
+
+       pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
+                                      IDA_PCPU_SIZE * sizeof(unsigned),
+                                      sizeof(unsigned));
+       if (!pool->tag_cpu)
+               goto err;
+
+       for_each_possible_cpu(cpu)
+               spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock);
+
+       return 0;
+err:
+       percpu_ida_destroy(pool);
+       return -ENOMEM;
+}
+EXPORT_SYMBOL_GPL(percpu_ida_init);
index e796429..7811ed3 100644 (file)
@@ -32,6 +32,7 @@
 #include <linux/string.h>
 #include <linux/bitops.h>
 #include <linux/rcupdate.h>
+#include <linux/hardirq.h>             /* in_interrupt() */
 
 
 #ifdef __KERNEL__
@@ -207,7 +208,12 @@ radix_tree_node_alloc(struct radix_tree_root *root)
        struct radix_tree_node *ret = NULL;
        gfp_t gfp_mask = root_gfp_mask(root);
 
-       if (!(gfp_mask & __GFP_WAIT)) {
+       /*
+        * Preload code isn't irq safe and it doesn't make sence to use
+        * preloading in the interrupt anyway as all the allocations have to
+        * be atomic. So just do normal allocation when in interrupt.
+        */
+       if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) {
                struct radix_tree_preload *rtp;
 
                /*
@@ -264,7 +270,7 @@ radix_tree_node_free(struct radix_tree_node *node)
  * To make use of this facility, the radix tree must be initialised without
  * __GFP_WAIT being passed to INIT_RADIX_TREE().
  */
-int radix_tree_preload(gfp_t gfp_mask)
+static int __radix_tree_preload(gfp_t gfp_mask)
 {
        struct radix_tree_preload *rtp;
        struct radix_tree_node *node;
@@ -288,8 +294,39 @@ int radix_tree_preload(gfp_t gfp_mask)
 out:
        return ret;
 }
+
+/*
+ * Load up this CPU's radix_tree_node buffer with sufficient objects to
+ * ensure that the addition of a single element in the tree cannot fail.  On
+ * success, return zero, with preemption disabled.  On error, return -ENOMEM
+ * with preemption not disabled.
+ *
+ * To make use of this facility, the radix tree must be initialised without
+ * __GFP_WAIT being passed to INIT_RADIX_TREE().
+ */
+int radix_tree_preload(gfp_t gfp_mask)
+{
+       /* Warn on non-sensical use... */
+       WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT));
+       return __radix_tree_preload(gfp_mask);
+}
 EXPORT_SYMBOL(radix_tree_preload);
 
+/*
+ * The same as above function, except we don't guarantee preloading happens.
+ * We do it, if we decide it helps. On success, return zero with preemption
+ * disabled. On error, return -ENOMEM with preemption not disabled.
+ */
+int radix_tree_maybe_preload(gfp_t gfp_mask)
+{
+       if (gfp_mask & __GFP_WAIT)
+               return __radix_tree_preload(gfp_mask);
+       /* Preloading doesn't help anything with this gfp mask, skip it */
+       preempt_disable();
+       return 0;
+}
+EXPORT_SYMBOL(radix_tree_maybe_preload);
+
 /*
  *     Return the maximum key which can be store into a
  *     radix tree with height HEIGHT.
index c0e31fe..65f4eff 100644 (file)
@@ -518,3 +518,43 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new,
        *new = *victim;
 }
 EXPORT_SYMBOL(rb_replace_node);
+
+static struct rb_node *rb_left_deepest_node(const struct rb_node *node)
+{
+       for (;;) {
+               if (node->rb_left)
+                       node = node->rb_left;
+               else if (node->rb_right)
+                       node = node->rb_right;
+               else
+                       return (struct rb_node *)node;
+       }
+}
+
+struct rb_node *rb_next_postorder(const struct rb_node *node)
+{
+       const struct rb_node *parent;
+       if (!node)
+               return NULL;
+       parent = rb_parent(node);
+
+       /* If we're sitting on node, we've already seen our children */
+       if (parent && node == parent->rb_left && parent->rb_right) {
+               /* If we are the parent's left node, go to the parent's right
+                * node then all the way down to the left */
+               return rb_left_deepest_node(parent->rb_right);
+       } else
+               /* Otherwise we are the parent's right node, and the parent
+                * should be next */
+               return (struct rb_node *)parent;
+}
+EXPORT_SYMBOL(rb_next_postorder);
+
+struct rb_node *rb_first_postorder(const struct rb_root *root)
+{
+       if (!root->rb_node)
+               return NULL;
+
+       return rb_left_deepest_node(root->rb_node);
+}
+EXPORT_SYMBOL(rb_first_postorder);
index 122f02f..31dd4cc 100644 (file)
@@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb)
        return count;
 }
 
+static void check_postorder(int nr_nodes)
+{
+       struct rb_node *rb;
+       int count = 0;
+       for (rb = rb_first_postorder(&root); rb; rb = rb_next_postorder(rb))
+               count++;
+
+       WARN_ON_ONCE(count != nr_nodes);
+}
+
 static void check(int nr_nodes)
 {
        struct rb_node *rb;
@@ -136,6 +146,8 @@ static void check(int nr_nodes)
 
        WARN_ON_ONCE(count != nr_nodes);
        WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1);
+
+       check_postorder(nr_nodes);
 }
 
 static void check_augmented(int nr_nodes)
index 6cdd270..026771a 100644 (file)
@@ -245,7 +245,7 @@ config COMPACTION
 config MIGRATION
        bool "Page migration"
        def_bool y
-       depends on NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA
+       depends on (NUMA || ARCH_ENABLE_MEMORY_HOTREMOVE || COMPACTION || CMA) && MMU
        help
          Allows the migration of the physical location of pages of processes
          while the virtual addresses are not changed. This is useful in
@@ -480,7 +480,7 @@ config FRONTSWAP
 
 config CMA
        bool "Contiguous Memory Allocator"
-       depends on HAVE_MEMBLOCK
+       depends on HAVE_MEMBLOCK && MMU
        select MIGRATION
        select MEMORY_ISOLATION
        help
index f008033..305d10a 100644 (file)
@@ -17,7 +17,7 @@ obj-y                 := filemap.o mempool.o oom_kill.o fadvise.o \
                           util.o mmzone.o vmstat.o backing-dev.o \
                           mm_init.o mmu_context.o percpu.o slab_common.o \
                           compaction.o balloon_compaction.o \
-                          interval_tree.o $(mmu-y)
+                          interval_tree.o list_lru.o $(mmu-y)
 
 obj-y += init-mm.o
 
index 37d9edc..ce682f7 100644 (file)
@@ -652,7 +652,7 @@ int pdflush_proc_obsolete(struct ctl_table *table, int write,
 {
        char kbuf[] = "0\n";
 
-       if (*ppos) {
+       if (*ppos || *lenp < sizeof(kbuf)) {
                *lenp = 0;
                return 0;
        }
index 05ccb4c..c437893 100644 (file)
@@ -1131,6 +1131,9 @@ void compact_pgdat(pg_data_t *pgdat, int order)
                .sync = false,
        };
 
+       if (!order)
+               return;
+
        __compact_pgdat(pgdat, &cc);
 }
 
index 731a2c2..1e6aec4 100644 (file)
@@ -467,32 +467,34 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
        error = mem_cgroup_cache_charge(page, current->mm,
                                        gfp_mask & GFP_RECLAIM_MASK);
        if (error)
-               goto out;
-
-       error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
-       if (error == 0) {
-               page_cache_get(page);
-               page->mapping = mapping;
-               page->index = offset;
+               return error;
 
-               spin_lock_irq(&mapping->tree_lock);
-               error = radix_tree_insert(&mapping->page_tree, offset, page);
-               if (likely(!error)) {
-                       mapping->nrpages++;
-                       __inc_zone_page_state(page, NR_FILE_PAGES);
-                       spin_unlock_irq(&mapping->tree_lock);
-                       trace_mm_filemap_add_to_page_cache(page);
-               } else {
-                       page->mapping = NULL;
-                       /* Leave page->index set: truncation relies upon it */
-                       spin_unlock_irq(&mapping->tree_lock);
-                       mem_cgroup_uncharge_cache_page(page);
-                       page_cache_release(page);
-               }
-               radix_tree_preload_end();
-       } else
+       error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+       if (error) {
                mem_cgroup_uncharge_cache_page(page);
-out:
+               return error;
+       }
+
+       page_cache_get(page);
+       page->mapping = mapping;
+       page->index = offset;
+
+       spin_lock_irq(&mapping->tree_lock);
+       error = radix_tree_insert(&mapping->page_tree, offset, page);
+       radix_tree_preload_end();
+       if (unlikely(error))
+               goto err_insert;
+       mapping->nrpages++;
+       __inc_zone_page_state(page, NR_FILE_PAGES);
+       spin_unlock_irq(&mapping->tree_lock);
+       trace_mm_filemap_add_to_page_cache(page);
+       return 0;
+err_insert:
+       page->mapping = NULL;
+       /* Leave page->index set: truncation relies upon it */
+       spin_unlock_irq(&mapping->tree_lock);
+       mem_cgroup_uncharge_cache_page(page);
+       page_cache_release(page);
        return error;
 }
 EXPORT_SYMBOL(add_to_page_cache_locked);
@@ -1614,6 +1616,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
        struct inode *inode = mapping->host;
        pgoff_t offset = vmf->pgoff;
        struct page *page;
+       bool memcg_oom;
        pgoff_t size;
        int ret = 0;
 
@@ -1622,7 +1625,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                return VM_FAULT_SIGBUS;
 
        /*
-        * Do we have something in the page cache already?
+        * Do we have something in the page cache already?  Either
+        * way, try readahead, but disable the memcg OOM killer for it
+        * as readahead is optional and no errors are propagated up
+        * the fault stack.  The OOM killer is enabled while trying to
+        * instantiate the faulting page individually below.
         */
        page = find_get_page(mapping, offset);
        if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
@@ -1630,10 +1637,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
                 * We found the page, so try async readahead before
                 * waiting for the lock.
                 */
+               memcg_oom = mem_cgroup_toggle_oom(false);
                do_async_mmap_readahead(vma, ra, file, page, offset);
+               mem_cgroup_toggle_oom(memcg_oom);
        } else if (!page) {
                /* No page in the page cache at all */
+               memcg_oom = mem_cgroup_toggle_oom(false);
                do_sync_mmap_readahead(vma, ra, file, offset);
+               mem_cgroup_toggle_oom(memcg_oom);
                count_vm_event(PGMAJFAULT);
                mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT);
                ret = VM_FAULT_MAJOR;
index a92012a..7489884 100644 (file)
@@ -211,24 +211,29 @@ static void put_huge_zero_page(void)
        BUG_ON(atomic_dec_and_test(&huge_zero_refcount));
 }
 
-static int shrink_huge_zero_page(struct shrinker *shrink,
-               struct shrink_control *sc)
+static unsigned long shrink_huge_zero_page_count(struct shrinker *shrink,
+                                       struct shrink_control *sc)
 {
-       if (!sc->nr_to_scan)
-               /* we can free zero page only if last reference remains */
-               return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
+       /* we can free zero page only if last reference remains */
+       return atomic_read(&huge_zero_refcount) == 1 ? HPAGE_PMD_NR : 0;
+}
 
+static unsigned long shrink_huge_zero_page_scan(struct shrinker *shrink,
+                                      struct shrink_control *sc)
+{
        if (atomic_cmpxchg(&huge_zero_refcount, 1, 0) == 1) {
                struct page *zero_page = xchg(&huge_zero_page, NULL);
                BUG_ON(zero_page == NULL);
                __free_page(zero_page);
+               return HPAGE_PMD_NR;
        }
 
        return 0;
 }
 
 static struct shrinker huge_zero_page_shrinker = {
-       .shrink = shrink_huge_zero_page,
+       .count_objects = shrink_huge_zero_page_count,
+       .scan_objects = shrink_huge_zero_page_scan,
        .seeks = DEFAULT_SEEKS,
 };
 
@@ -417,7 +422,7 @@ static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
        unsigned long msecs;
        int err;
 
-       err = strict_strtoul(buf, 10, &msecs);
+       err = kstrtoul(buf, 10, &msecs);
        if (err || msecs > UINT_MAX)
                return -EINVAL;
 
@@ -444,7 +449,7 @@ static ssize_t alloc_sleep_millisecs_store(struct kobject *kobj,
        unsigned long msecs;
        int err;
 
-       err = strict_strtoul(buf, 10, &msecs);
+       err = kstrtoul(buf, 10, &msecs);
        if (err || msecs > UINT_MAX)
                return -EINVAL;
 
@@ -470,7 +475,7 @@ static ssize_t pages_to_scan_store(struct kobject *kobj,
        int err;
        unsigned long pages;
 
-       err = strict_strtoul(buf, 10, &pages);
+       err = kstrtoul(buf, 10, &pages);
        if (err || !pages || pages > UINT_MAX)
                return -EINVAL;
 
@@ -538,7 +543,7 @@ static ssize_t khugepaged_max_ptes_none_store(struct kobject *kobj,
        int err;
        unsigned long max_ptes_none;
 
-       err = strict_strtoul(buf, 10, &max_ptes_none);
+       err = kstrtoul(buf, 10, &max_ptes_none);
        if (err || max_ptes_none > HPAGE_PMD_NR-1)
                return -EINVAL;
 
@@ -690,11 +695,10 @@ pmd_t maybe_pmd_mkwrite(pmd_t pmd, struct vm_area_struct *vma)
        return pmd;
 }
 
-static inline pmd_t mk_huge_pmd(struct page *page, struct vm_area_struct *vma)
+static inline pmd_t mk_huge_pmd(struct page *page, pgprot_t prot)
 {
        pmd_t entry;
-       entry = mk_pmd(page, vma->vm_page_prot);
-       entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
+       entry = mk_pmd(page, prot);
        entry = pmd_mkhuge(entry);
        return entry;
 }
@@ -727,7 +731,8 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm,
                pte_free(mm, pgtable);
        } else {
                pmd_t entry;
-               entry = mk_huge_pmd(page, vma);
+               entry = mk_huge_pmd(page, vma->vm_page_prot);
+               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                page_add_new_anon_rmap(page, vma, haddr);
                pgtable_trans_huge_deposit(mm, pmd, pgtable);
                set_pmd_at(mm, haddr, pmd, entry);
@@ -783,77 +788,57 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct page *page;
        unsigned long haddr = address & HPAGE_PMD_MASK;
-       pte_t *pte;
 
-       if (haddr >= vma->vm_start && haddr + HPAGE_PMD_SIZE <= vma->vm_end) {
-               if (unlikely(anon_vma_prepare(vma)))
-                       return VM_FAULT_OOM;
-               if (unlikely(khugepaged_enter(vma)))
+       if (haddr < vma->vm_start || haddr + HPAGE_PMD_SIZE > vma->vm_end)
+               return VM_FAULT_FALLBACK;
+       if (unlikely(anon_vma_prepare(vma)))
+               return VM_FAULT_OOM;
+       if (unlikely(khugepaged_enter(vma)))
+               return VM_FAULT_OOM;
+       if (!(flags & FAULT_FLAG_WRITE) &&
+                       transparent_hugepage_use_zero_page()) {
+               pgtable_t pgtable;
+               struct page *zero_page;
+               bool set;
+               pgtable = pte_alloc_one(mm, haddr);
+               if (unlikely(!pgtable))
                        return VM_FAULT_OOM;
-               if (!(flags & FAULT_FLAG_WRITE) &&
-                               transparent_hugepage_use_zero_page()) {
-                       pgtable_t pgtable;
-                       struct page *zero_page;
-                       bool set;
-                       pgtable = pte_alloc_one(mm, haddr);
-                       if (unlikely(!pgtable))
-                               return VM_FAULT_OOM;
-                       zero_page = get_huge_zero_page();
-                       if (unlikely(!zero_page)) {
-                               pte_free(mm, pgtable);
-                               count_vm_event(THP_FAULT_FALLBACK);
-                               goto out;
-                       }
-                       spin_lock(&mm->page_table_lock);
-                       set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
-                                       zero_page);
-                       spin_unlock(&mm->page_table_lock);
-                       if (!set) {
-                               pte_free(mm, pgtable);
-                               put_huge_zero_page();
-                       }
-                       return 0;
-               }
-               page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
-                                         vma, haddr, numa_node_id(), 0);
-               if (unlikely(!page)) {
+               zero_page = get_huge_zero_page();
+               if (unlikely(!zero_page)) {
+                       pte_free(mm, pgtable);
                        count_vm_event(THP_FAULT_FALLBACK);
-                       goto out;
+                       return VM_FAULT_FALLBACK;
                }
-               count_vm_event(THP_FAULT_ALLOC);
-               if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
-                       put_page(page);
-                       goto out;
-               }
-               if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd,
-                                                         page))) {
-                       mem_cgroup_uncharge_page(page);
-                       put_page(page);
-                       goto out;
+               spin_lock(&mm->page_table_lock);
+               set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd,
+                               zero_page);
+               spin_unlock(&mm->page_table_lock);
+               if (!set) {
+                       pte_free(mm, pgtable);
+                       put_huge_zero_page();
                }
-
                return 0;
        }
-out:
-       /*
-        * Use __pte_alloc instead of pte_alloc_map, because we can't
-        * run pte_offset_map on the pmd, if an huge pmd could
-        * materialize from under us from a different thread.
-        */
-       if (unlikely(pmd_none(*pmd)) &&
-           unlikely(__pte_alloc(mm, vma, pmd, address)))
-               return VM_FAULT_OOM;
-       /* if an huge pmd materialized from under us just retry later */
-       if (unlikely(pmd_trans_huge(*pmd)))
-               return 0;
-       /*
-        * A regular pmd is established and it can't morph into a huge pmd
-        * from under us anymore at this point because we hold the mmap_sem
-        * read mode and khugepaged takes it in write mode. So now it's
-        * safe to run pte_offset_map().
-        */
-       pte = pte_offset_map(pmd, address);
-       return handle_pte_fault(mm, vma, address, pte, pmd, flags);
+       page = alloc_hugepage_vma(transparent_hugepage_defrag(vma),
+                       vma, haddr, numa_node_id(), 0);
+       if (unlikely(!page)) {
+               count_vm_event(THP_FAULT_FALLBACK);
+               return VM_FAULT_FALLBACK;
+       }
+       if (unlikely(mem_cgroup_newpage_charge(page, mm, GFP_KERNEL))) {
+               put_page(page);
+               count_vm_event(THP_FAULT_FALLBACK);
+               return VM_FAULT_FALLBACK;
+       }
+       if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page))) {
+               mem_cgroup_uncharge_page(page);
+               put_page(page);
+               count_vm_event(THP_FAULT_FALLBACK);
+               return VM_FAULT_FALLBACK;
+       }
+
+       count_vm_event(THP_FAULT_ALLOC);
+       return 0;
 }
 
 int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
@@ -1165,7 +1150,6 @@ alloc:
                new_page = NULL;
 
        if (unlikely(!new_page)) {
-               count_vm_event(THP_FAULT_FALLBACK);
                if (is_huge_zero_pmd(orig_pmd)) {
                        ret = do_huge_pmd_wp_zero_page_fallback(mm, vma,
                                        address, pmd, orig_pmd, haddr);
@@ -1176,9 +1160,9 @@ alloc:
                                split_huge_page(page);
                        put_page(page);
                }
+               count_vm_event(THP_FAULT_FALLBACK);
                goto out;
        }
-       count_vm_event(THP_FAULT_ALLOC);
 
        if (unlikely(mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))) {
                put_page(new_page);
@@ -1186,10 +1170,13 @@ alloc:
                        split_huge_page(page);
                        put_page(page);
                }
+               count_vm_event(THP_FAULT_FALLBACK);
                ret |= VM_FAULT_OOM;
                goto out;
        }
 
+       count_vm_event(THP_FAULT_ALLOC);
+
        if (is_huge_zero_pmd(orig_pmd))
                clear_huge_page(new_page, haddr, HPAGE_PMD_NR);
        else
@@ -1210,7 +1197,8 @@ alloc:
                goto out_mn;
        } else {
                pmd_t entry;
-               entry = mk_huge_pmd(new_page, vma);
+               entry = mk_huge_pmd(new_page, vma->vm_page_prot);
+               entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
                pmdp_clear_flush(vma, haddr, pmd);
                page_add_new_anon_rmap(new_page, vma, haddr);
                set_pmd_at(mm, haddr, pmd, entry);
@@ -1661,7 +1649,6 @@ static void __split_huge_page_refcount(struct page *page,
        BUG_ON(atomic_read(&page->_count) <= 0);
 
        __mod_zone_page_state(zone, NR_ANON_TRANSPARENT_HUGEPAGES, -1);
-       __mod_zone_page_state(zone, NR_ANON_PAGES, HPAGE_PMD_NR);
 
        ClearPageCompound(page);
        compound_unlock(page);
@@ -2296,6 +2283,8 @@ static void collapse_huge_page(struct mm_struct *mm,
                goto out;
 
        vma = find_vma(mm, address);
+       if (!vma)
+               goto out;
        hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
        hend = vma->vm_end & HPAGE_PMD_MASK;
        if (address < hstart || address + HPAGE_PMD_SIZE > hend)
@@ -2357,7 +2346,8 @@ static void collapse_huge_page(struct mm_struct *mm,
        __SetPageUptodate(new_page);
        pgtable = pmd_pgtable(_pmd);
 
-       _pmd = mk_huge_pmd(new_page, vma);
+       _pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
+       _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
 
        /*
         * spin_lock() below is not the equivalent of smp_wmb(), so
index b60f330..b49579c 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/rmap.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
+#include <linux/page-isolation.h>
 
 #include <asm/page.h>
 #include <asm/pgtable.h>
@@ -33,7 +34,6 @@
 #include "internal.h"
 
 const unsigned long hugetlb_zero = 0, hugetlb_infinity = ~0UL;
-static gfp_t htlb_alloc_mask = GFP_HIGHUSER;
 unsigned long hugepages_treat_as_movable;
 
 int hugetlb_max_hstate __read_mostly;
@@ -48,7 +48,8 @@ static unsigned long __initdata default_hstate_max_huge_pages;
 static unsigned long __initdata default_hstate_size;
 
 /*
- * Protects updates to hugepage_freelists, nr_huge_pages, and free_huge_pages
+ * Protects updates to hugepage_freelists, hugepage_activelist, nr_huge_pages,
+ * free_huge_pages, and surplus_huge_pages.
  */
 DEFINE_SPINLOCK(hugetlb_lock);
 
@@ -135,9 +136,9 @@ static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
  *                    across the pages in a mapping.
  *
  * The region data structures are protected by a combination of the mmap_sem
- * and the hugetlb_instantion_mutex.  To access or modify a region the caller
+ * and the hugetlb_instantiation_mutex.  To access or modify a region the caller
  * must either hold the mmap_sem for write, or the mmap_sem for read and
- * the hugetlb_instantiation mutex:
+ * the hugetlb_instantiation_mutex:
  *
  *     down_write(&mm->mmap_sem);
  * or
@@ -434,25 +435,6 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
        return (get_vma_private_data(vma) & flag) != 0;
 }
 
-/* Decrement the reserved pages in the hugepage pool by one */
-static void decrement_hugepage_resv_vma(struct hstate *h,
-                       struct vm_area_struct *vma)
-{
-       if (vma->vm_flags & VM_NORESERVE)
-               return;
-
-       if (vma->vm_flags & VM_MAYSHARE) {
-               /* Shared mappings always use reserves */
-               h->resv_huge_pages--;
-       } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
-               /*
-                * Only the process that called mmap() has reserves for
-                * private mappings.
-                */
-               h->resv_huge_pages--;
-       }
-}
-
 /* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
@@ -462,12 +444,35 @@ void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 }
 
 /* Returns true if the VMA has associated reserve pages */
-static int vma_has_reserves(struct vm_area_struct *vma)
+static int vma_has_reserves(struct vm_area_struct *vma, long chg)
 {
+       if (vma->vm_flags & VM_NORESERVE) {
+               /*
+                * This address is already reserved by other process(chg == 0),
+                * so, we should decrement reserved count. Without decrementing,
+                * reserve count remains after releasing inode, because this
+                * allocated page will go into page cache and is regarded as
+                * coming from reserved pool in releasing step.  Currently, we
+                * don't have any other solution to deal with this situation
+                * properly, so add work-around here.
+                */
+               if (vma->vm_flags & VM_MAYSHARE && chg == 0)
+                       return 1;
+               else
+                       return 0;
+       }
+
+       /* Shared mappings always use reserves */
        if (vma->vm_flags & VM_MAYSHARE)
                return 1;
+
+       /*
+        * Only the process that called mmap() has reserves for
+        * private mappings.
+        */
        if (is_vma_resv_set(vma, HPAGE_RESV_OWNER))
                return 1;
+
        return 0;
 }
 
@@ -517,9 +522,15 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
 {
        struct page *page;
 
-       if (list_empty(&h->hugepage_freelists[nid]))
+       list_for_each_entry(page, &h->hugepage_freelists[nid], lru)
+               if (!is_migrate_isolate_page(page))
+                       break;
+       /*
+        * if 'non-isolated free hugepage' not found on the list,
+        * the allocation fails.
+        */
+       if (&h->hugepage_freelists[nid] == &page->lru)
                return NULL;
-       page = list_entry(h->hugepage_freelists[nid].next, struct page, lru);
        list_move(&page->lru, &h->hugepage_activelist);
        set_page_refcounted(page);
        h->free_huge_pages--;
@@ -527,9 +538,19 @@ static struct page *dequeue_huge_page_node(struct hstate *h, int nid)
        return page;
 }
 
+/* Movability of hugepages depends on migration support. */
+static inline gfp_t htlb_alloc_mask(struct hstate *h)
+{
+       if (hugepages_treat_as_movable || hugepage_migration_support(h))
+               return GFP_HIGHUSER_MOVABLE;
+       else
+               return GFP_HIGHUSER;
+}
+
 static struct page *dequeue_huge_page_vma(struct hstate *h,
                                struct vm_area_struct *vma,
-                               unsigned long address, int avoid_reserve)
+                               unsigned long address, int avoid_reserve,
+                               long chg)
 {
        struct page *page = NULL;
        struct mempolicy *mpol;
@@ -539,16 +560,12 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
        struct zoneref *z;
        unsigned int cpuset_mems_cookie;
 
-retry_cpuset:
-       cpuset_mems_cookie = get_mems_allowed();
-       zonelist = huge_zonelist(vma, address,
-                                       htlb_alloc_mask, &mpol, &nodemask);
        /*
         * A child process with MAP_PRIVATE mappings created by their parent
         * have no page reserves. This check ensures that reservations are
         * not "stolen". The child may still get SIGKILLed
         */
-       if (!vma_has_reserves(vma) &&
+       if (!vma_has_reserves(vma, chg) &&
                        h->free_huge_pages - h->resv_huge_pages == 0)
                goto err;
 
@@ -556,13 +573,23 @@ retry_cpuset:
        if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0)
                goto err;
 
+retry_cpuset:
+       cpuset_mems_cookie = get_mems_allowed();
+       zonelist = huge_zonelist(vma, address,
+                                       htlb_alloc_mask(h), &mpol, &nodemask);
+
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                                MAX_NR_ZONES - 1, nodemask) {
-               if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask)) {
+               if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
                        page = dequeue_huge_page_node(h, zone_to_nid(zone));
                        if (page) {
-                               if (!avoid_reserve)
-                                       decrement_hugepage_resv_vma(h, vma);
+                               if (avoid_reserve)
+                                       break;
+                               if (!vma_has_reserves(vma, chg))
+                                       break;
+
+                               SetPagePrivate(page);
+                               h->resv_huge_pages--;
                                break;
                        }
                }
@@ -574,7 +601,6 @@ retry_cpuset:
        return page;
 
 err:
-       mpol_cond_put(mpol);
        return NULL;
 }
 
@@ -620,15 +646,20 @@ static void free_huge_page(struct page *page)
        int nid = page_to_nid(page);
        struct hugepage_subpool *spool =
                (struct hugepage_subpool *)page_private(page);
+       bool restore_reserve;
 
        set_page_private(page, 0);
        page->mapping = NULL;
        BUG_ON(page_count(page));
        BUG_ON(page_mapcount(page));
+       restore_reserve = PagePrivate(page);
 
        spin_lock(&hugetlb_lock);
        hugetlb_cgroup_uncharge_page(hstate_index(h),
                                     pages_per_huge_page(h), page);
+       if (restore_reserve)
+               h->resv_huge_pages++;
+
        if (h->surplus_huge_pages_node[nid] && huge_page_order(h) < MAX_ORDER) {
                /* remove the page from active list */
                list_del(&page->lru);
@@ -715,7 +746,7 @@ static struct page *alloc_fresh_huge_page_node(struct hstate *h, int nid)
                return NULL;
 
        page = alloc_pages_exact_node(nid,
-               htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+               htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                                                __GFP_REPEAT|__GFP_NOWARN,
                huge_page_order(h));
        if (page) {
@@ -772,33 +803,6 @@ static int hstate_next_node_to_alloc(struct hstate *h,
        return nid;
 }
 
-static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
-{
-       struct page *page;
-       int start_nid;
-       int next_nid;
-       int ret = 0;
-
-       start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-       next_nid = start_nid;
-
-       do {
-               page = alloc_fresh_huge_page_node(h, next_nid);
-               if (page) {
-                       ret = 1;
-                       break;
-               }
-               next_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-       } while (next_nid != start_nid);
-
-       if (ret)
-               count_vm_event(HTLB_BUDDY_PGALLOC);
-       else
-               count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
-
-       return ret;
-}
-
 /*
  * helper for free_pool_huge_page() - return the previously saved
  * node ["this node"] from which to free a huge page.  Advance the
@@ -817,6 +821,40 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
        return nid;
 }
 
+#define for_each_node_mask_to_alloc(hs, nr_nodes, node, mask)          \
+       for (nr_nodes = nodes_weight(*mask);                            \
+               nr_nodes > 0 &&                                         \
+               ((node = hstate_next_node_to_alloc(hs, mask)) || 1);    \
+               nr_nodes--)
+
+#define for_each_node_mask_to_free(hs, nr_nodes, node, mask)           \
+       for (nr_nodes = nodes_weight(*mask);                            \
+               nr_nodes > 0 &&                                         \
+               ((node = hstate_next_node_to_free(hs, mask)) || 1);     \
+               nr_nodes--)
+
+static int alloc_fresh_huge_page(struct hstate *h, nodemask_t *nodes_allowed)
+{
+       struct page *page;
+       int nr_nodes, node;
+       int ret = 0;
+
+       for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+               page = alloc_fresh_huge_page_node(h, node);
+               if (page) {
+                       ret = 1;
+                       break;
+               }
+       }
+
+       if (ret)
+               count_vm_event(HTLB_BUDDY_PGALLOC);
+       else
+               count_vm_event(HTLB_BUDDY_PGALLOC_FAIL);
+
+       return ret;
+}
+
 /*
  * Free huge page from pool from next node to free.
  * Attempt to keep persistent huge pages more or less
@@ -826,40 +864,73 @@ static int hstate_next_node_to_free(struct hstate *h, nodemask_t *nodes_allowed)
 static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
                                                         bool acct_surplus)
 {
-       int start_nid;
-       int next_nid;
+       int nr_nodes, node;
        int ret = 0;
 
-       start_nid = hstate_next_node_to_free(h, nodes_allowed);
-       next_nid = start_nid;
-
-       do {
+       for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
                /*
                 * If we're returning unused surplus pages, only examine
                 * nodes with surplus pages.
                 */
-               if ((!acct_surplus || h->surplus_huge_pages_node[next_nid]) &&
-                   !list_empty(&h->hugepage_freelists[next_nid])) {
+               if ((!acct_surplus || h->surplus_huge_pages_node[node]) &&
+                   !list_empty(&h->hugepage_freelists[node])) {
                        struct page *page =
-                               list_entry(h->hugepage_freelists[next_nid].next,
+                               list_entry(h->hugepage_freelists[node].next,
                                          struct page, lru);
                        list_del(&page->lru);
                        h->free_huge_pages--;
-                       h->free_huge_pages_node[next_nid]--;
+                       h->free_huge_pages_node[node]--;
                        if (acct_surplus) {
                                h->surplus_huge_pages--;
-                               h->surplus_huge_pages_node[next_nid]--;
+                               h->surplus_huge_pages_node[node]--;
                        }
                        update_and_free_page(h, page);
                        ret = 1;
                        break;
                }
-               next_nid = hstate_next_node_to_free(h, nodes_allowed);
-       } while (next_nid != start_nid);
+       }
 
        return ret;
 }
 
+/*
+ * Dissolve a given free hugepage into free buddy pages. This function does
+ * nothing for in-use (including surplus) hugepages.
+ */
+static void dissolve_free_huge_page(struct page *page)
+{
+       spin_lock(&hugetlb_lock);
+       if (PageHuge(page) && !page_count(page)) {
+               struct hstate *h = page_hstate(page);
+               int nid = page_to_nid(page);
+               list_del(&page->lru);
+               h->free_huge_pages--;
+               h->free_huge_pages_node[nid]--;
+               update_and_free_page(h, page);
+       }
+       spin_unlock(&hugetlb_lock);
+}
+
+/*
+ * Dissolve free hugepages in a given pfn range. Used by memory hotplug to
+ * make specified memory blocks removable from the system.
+ * Note that start_pfn should aligned with (minimum) hugepage size.
+ */
+void dissolve_free_huge_pages(unsigned long start_pfn, unsigned long end_pfn)
+{
+       unsigned int order = 8 * sizeof(void *);
+       unsigned long pfn;
+       struct hstate *h;
+
+       /* Set scan step to minimum hugepage size */
+       for_each_hstate(h)
+               if (order > huge_page_order(h))
+                       order = huge_page_order(h);
+       VM_BUG_ON(!IS_ALIGNED(start_pfn, 1 << order));
+       for (pfn = start_pfn; pfn < end_pfn; pfn += 1 << order)
+               dissolve_free_huge_page(pfn_to_page(pfn));
+}
+
 static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
 {
        struct page *page;
@@ -902,12 +973,12 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
        spin_unlock(&hugetlb_lock);
 
        if (nid == NUMA_NO_NODE)
-               page = alloc_pages(htlb_alloc_mask|__GFP_COMP|
+               page = alloc_pages(htlb_alloc_mask(h)|__GFP_COMP|
                                   __GFP_REPEAT|__GFP_NOWARN,
                                   huge_page_order(h));
        else
                page = alloc_pages_exact_node(nid,
-                       htlb_alloc_mask|__GFP_COMP|__GFP_THISNODE|
+                       htlb_alloc_mask(h)|__GFP_COMP|__GFP_THISNODE|
                        __GFP_REPEAT|__GFP_NOWARN, huge_page_order(h));
 
        if (page && arch_prepare_hugepage(page)) {
@@ -944,10 +1015,11 @@ static struct page *alloc_buddy_huge_page(struct hstate *h, int nid)
  */
 struct page *alloc_huge_page_node(struct hstate *h, int nid)
 {
-       struct page *page;
+       struct page *page = NULL;
 
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_node(h, nid);
+       if (h->free_huge_pages - h->resv_huge_pages > 0)
+               page = dequeue_huge_page_node(h, nid);
        spin_unlock(&hugetlb_lock);
 
        if (!page)
@@ -1035,11 +1107,8 @@ free:
        spin_unlock(&hugetlb_lock);
 
        /* Free unnecessary surplus pages to the buddy allocator */
-       if (!list_empty(&surplus_list)) {
-               list_for_each_entry_safe(page, tmp, &surplus_list, lru) {
-                       put_page(page);
-               }
-       }
+       list_for_each_entry_safe(page, tmp, &surplus_list, lru)
+               put_page(page);
        spin_lock(&hugetlb_lock);
 
        return ret;
@@ -1106,9 +1175,9 @@ static long vma_needs_reservation(struct hstate *h,
        } else  {
                long err;
                pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-               struct resv_map *reservations = vma_resv_map(vma);
+               struct resv_map *resv = vma_resv_map(vma);
 
-               err = region_chg(&reservations->regions, idx, idx + 1);
+               err = region_chg(&resv->regions, idx, idx + 1);
                if (err < 0)
                        return err;
                return 0;
@@ -1126,10 +1195,10 @@ static void vma_commit_reservation(struct hstate *h,
 
        } else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER)) {
                pgoff_t idx = vma_hugecache_offset(h, vma, addr);
-               struct resv_map *reservations = vma_resv_map(vma);
+               struct resv_map *resv = vma_resv_map(vma);
 
                /* Mark this page used in the map. */
-               region_add(&reservations->regions, idx, idx + 1);
+               region_add(&resv->regions, idx, idx + 1);
        }
 }
 
@@ -1155,38 +1224,35 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        chg = vma_needs_reservation(h, vma, addr);
        if (chg < 0)
                return ERR_PTR(-ENOMEM);
-       if (chg)
-               if (hugepage_subpool_get_pages(spool, chg))
+       if (chg || avoid_reserve)
+               if (hugepage_subpool_get_pages(spool, 1))
                        return ERR_PTR(-ENOSPC);
 
        ret = hugetlb_cgroup_charge_cgroup(idx, pages_per_huge_page(h), &h_cg);
        if (ret) {
-               hugepage_subpool_put_pages(spool, chg);
+               if (chg || avoid_reserve)
+                       hugepage_subpool_put_pages(spool, 1);
                return ERR_PTR(-ENOSPC);
        }
        spin_lock(&hugetlb_lock);
-       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve);
-       if (page) {
-               /* update page cgroup details */
-               hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h),
-                                            h_cg, page);
-               spin_unlock(&hugetlb_lock);
-       } else {
+       page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, chg);
+       if (!page) {
                spin_unlock(&hugetlb_lock);
                page = alloc_buddy_huge_page(h, NUMA_NO_NODE);
                if (!page) {
                        hugetlb_cgroup_uncharge_cgroup(idx,
                                                       pages_per_huge_page(h),
                                                       h_cg);
-                       hugepage_subpool_put_pages(spool, chg);
+                       if (chg || avoid_reserve)
+                               hugepage_subpool_put_pages(spool, 1);
                        return ERR_PTR(-ENOSPC);
                }
                spin_lock(&hugetlb_lock);
-               hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h),
-                                            h_cg, page);
                list_move(&page->lru, &h->hugepage_activelist);
-               spin_unlock(&hugetlb_lock);
+               /* Fall through */
        }
+       hugetlb_cgroup_commit_charge(idx, pages_per_huge_page(h), h_cg, page);
+       spin_unlock(&hugetlb_lock);
 
        set_page_private(page, (unsigned long)spool);
 
@@ -1194,17 +1260,29 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
        return page;
 }
 
+/*
+ * alloc_huge_page()'s wrapper which simply returns the page if allocation
+ * succeeds, otherwise NULL. This function is called from new_vma_page(),
+ * where no ERR_VALUE is expected to be returned.
+ */
+struct page *alloc_huge_page_noerr(struct vm_area_struct *vma,
+                               unsigned long addr, int avoid_reserve)
+{
+       struct page *page = alloc_huge_page(vma, addr, avoid_reserve);
+       if (IS_ERR(page))
+               page = NULL;
+       return page;
+}
+
 int __weak alloc_bootmem_huge_page(struct hstate *h)
 {
        struct huge_bootmem_page *m;
-       int nr_nodes = nodes_weight(node_states[N_MEMORY]);
+       int nr_nodes, node;
 
-       while (nr_nodes) {
+       for_each_node_mask_to_alloc(h, nr_nodes, node, &node_states[N_MEMORY]) {
                void *addr;
 
-               addr = __alloc_bootmem_node_nopanic(
-                               NODE_DATA(hstate_next_node_to_alloc(h,
-                                               &node_states[N_MEMORY])),
+               addr = __alloc_bootmem_node_nopanic(NODE_DATA(node),
                                huge_page_size(h), huge_page_size(h), 0);
 
                if (addr) {
@@ -1216,7 +1294,6 @@ int __weak alloc_bootmem_huge_page(struct hstate *h)
                        m = addr;
                        goto found;
                }
-               nr_nodes--;
        }
        return 0;
 
@@ -1355,48 +1432,28 @@ static inline void try_to_free_low(struct hstate *h, unsigned long count,
 static int adjust_pool_surplus(struct hstate *h, nodemask_t *nodes_allowed,
                                int delta)
 {
-       int start_nid, next_nid;
-       int ret = 0;
+       int nr_nodes, node;
 
        VM_BUG_ON(delta != -1 && delta != 1);
 
-       if (delta < 0)
-               start_nid = hstate_next_node_to_alloc(h, nodes_allowed);
-       else
-               start_nid = hstate_next_node_to_free(h, nodes_allowed);
-       next_nid = start_nid;
-
-       do {
-               int nid = next_nid;
-               if (delta < 0)  {
-                       /*
-                        * To shrink on this node, there must be a surplus page
-                        */
-                       if (!h->surplus_huge_pages_node[nid]) {
-                               next_nid = hstate_next_node_to_alloc(h,
-                                                               nodes_allowed);
-                               continue;
-                       }
+       if (delta < 0) {
+               for_each_node_mask_to_alloc(h, nr_nodes, node, nodes_allowed) {
+                       if (h->surplus_huge_pages_node[node])
+                               goto found;
                }
-               if (delta > 0) {
-                       /*
-                        * Surplus cannot exceed the total number of pages
-                        */
-                       if (h->surplus_huge_pages_node[nid] >=
-                                               h->nr_huge_pages_node[nid]) {
-                               next_nid = hstate_next_node_to_free(h,
-                                                               nodes_allowed);
-                               continue;
-                       }
+       } else {
+               for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
+                       if (h->surplus_huge_pages_node[node] <
+                                       h->nr_huge_pages_node[node])
+                               goto found;
                }
+       }
+       return 0;
 
-               h->surplus_huge_pages += delta;
-               h->surplus_huge_pages_node[nid] += delta;
-               ret = 1;
-               break;
-       } while (next_nid != start_nid);
-
-       return ret;
+found:
+       h->surplus_huge_pages += delta;
+       h->surplus_huge_pages_node[node] += delta;
+       return 1;
 }
 
 #define persistent_huge_pages(h) (h->nr_huge_pages - h->surplus_huge_pages)
@@ -1526,7 +1583,7 @@ static ssize_t nr_hugepages_store_common(bool obey_mempolicy,
        struct hstate *h;
        NODEMASK_ALLOC(nodemask_t, nodes_allowed, GFP_KERNEL | __GFP_NORETRY);
 
-       err = strict_strtoul(buf, 10, &count);
+       err = kstrtoul(buf, 10, &count);
        if (err)
                goto out;
 
@@ -1617,7 +1674,7 @@ static ssize_t nr_overcommit_hugepages_store(struct kobject *kobj,
        if (h->order >= MAX_ORDER)
                return -EINVAL;
 
-       err = strict_strtoul(buf, 10, &input);
+       err = kstrtoul(buf, 10, &input);
        if (err)
                return err;
 
@@ -2068,18 +2125,6 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
 }
 #endif /* CONFIG_NUMA */
 
-int hugetlb_treat_movable_handler(struct ctl_table *table, int write,
-                       void __user *buffer,
-                       size_t *length, loff_t *ppos)
-{
-       proc_dointvec(table, write, buffer, length, ppos);
-       if (hugepages_treat_as_movable)
-               htlb_alloc_mask = GFP_HIGHUSER_MOVABLE;
-       else
-               htlb_alloc_mask = GFP_HIGHUSER;
-       return 0;
-}
-
 int hugetlb_overcommit_handler(struct ctl_table *table, int write,
                        void __user *buffer,
                        size_t *length, loff_t *ppos)
@@ -2207,7 +2252,7 @@ out:
 
 static void hugetlb_vm_op_open(struct vm_area_struct *vma)
 {
-       struct resv_map *reservations = vma_resv_map(vma);
+       struct resv_map *resv = vma_resv_map(vma);
 
        /*
         * This new VMA should share its siblings reservation map if present.
@@ -2217,34 +2262,34 @@ static void hugetlb_vm_op_open(struct vm_area_struct *vma)
         * after this open call completes.  It is therefore safe to take a
         * new reference here without additional locking.
         */
-       if (reservations)
-               kref_get(&reservations->refs);
+       if (resv)
+               kref_get(&resv->refs);
 }
 
 static void resv_map_put(struct vm_area_struct *vma)
 {
-       struct resv_map *reservations = vma_resv_map(vma);
+       struct resv_map *resv = vma_resv_map(vma);
 
-       if (!reservations)
+       if (!resv)
                return;
-       kref_put(&reservations->refs, resv_map_release);
+       kref_put(&resv->refs, resv_map_release);
 }
 
 static void hugetlb_vm_op_close(struct vm_area_struct *vma)
 {
        struct hstate *h = hstate_vma(vma);
-       struct resv_map *reservations = vma_resv_map(vma);
+       struct resv_map *resv = vma_resv_map(vma);
        struct hugepage_subpool *spool = subpool_vma(vma);
        unsigned long reserve;
        unsigned long start;
        unsigned long end;
 
-       if (reservations) {
+       if (resv) {
                start = vma_hugecache_offset(h, vma, vma->vm_start);
                end = vma_hugecache_offset(h, vma, vma->vm_end);
 
                reserve = (end - start) -
-                       region_count(&reservations->regions, start, end);
+                       region_count(&resv->regions, start, end);
 
                resv_map_put(vma);
 
@@ -2557,7 +2602,6 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 {
        struct hstate *h = hstate_vma(vma);
        struct page *old_page, *new_page;
-       int avoidcopy;
        int outside_reserve = 0;
        unsigned long mmun_start;       /* For mmu_notifiers */
        unsigned long mmun_end;         /* For mmu_notifiers */
@@ -2567,10 +2611,8 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 retry_avoidcopy:
        /* If no-one else is actually using this page, avoid the copy
         * and just make the page writable */
-       avoidcopy = (page_mapcount(old_page) == 1);
-       if (avoidcopy) {
-               if (PageAnon(old_page))
-                       page_move_anon_rmap(old_page, vma, address);
+       if (page_mapcount(old_page) == 1 && PageAnon(old_page)) {
+               page_move_anon_rmap(old_page, vma, address);
                set_huge_ptep_writable(vma, address, ptep);
                return 0;
        }
@@ -2584,8 +2626,7 @@ retry_avoidcopy:
         * at the time of fork() could consume its reserves on COW instead
         * of the full address range.
         */
-       if (!(vma->vm_flags & VM_MAYSHARE) &&
-                       is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
+       if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) &&
                        old_page != pagecache_page)
                outside_reserve = 1;
 
@@ -2657,6 +2698,8 @@ retry_avoidcopy:
        spin_lock(&mm->page_table_lock);
        ptep = huge_pte_offset(mm, address & huge_page_mask(h));
        if (likely(pte_same(huge_ptep_get(ptep), pte))) {
+               ClearPagePrivate(new_page);
+
                /* Break COW */
                huge_ptep_clear_flush(vma, address, ptep);
                set_huge_pte_at(mm, address, ptep,
@@ -2668,10 +2711,11 @@ retry_avoidcopy:
        }
        spin_unlock(&mm->page_table_lock);
        mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
-       /* Caller expects lock to be held */
-       spin_lock(&mm->page_table_lock);
        page_cache_release(new_page);
        page_cache_release(old_page);
+
+       /* Caller expects lock to be held */
+       spin_lock(&mm->page_table_lock);
        return 0;
 }
 
@@ -2767,6 +2811,7 @@ retry:
                                        goto retry;
                                goto out;
                        }
+                       ClearPagePrivate(page);
 
                        spin_lock(&inode->i_lock);
                        inode->i_blocks += blocks_per_huge_page(h);
@@ -2813,8 +2858,10 @@ retry:
        if (!huge_pte_none(huge_ptep_get(ptep)))
                goto backout;
 
-       if (anon_rmap)
+       if (anon_rmap) {
+               ClearPagePrivate(page);
                hugepage_add_new_anon_rmap(page, vma, address);
+       }
        else
                page_dup_rmap(page);
        new_pte = make_huge_pte(vma, page, ((vma->vm_flags & VM_WRITE)
@@ -3431,3 +3478,45 @@ int dequeue_hwpoisoned_huge_page(struct page *hpage)
        return ret;
 }
 #endif
+
+bool isolate_huge_page(struct page *page, struct list_head *list)
+{
+       VM_BUG_ON(!PageHead(page));
+       if (!get_page_unless_zero(page))
+               return false;
+       spin_lock(&hugetlb_lock);
+       list_move_tail(&page->lru, list);
+       spin_unlock(&hugetlb_lock);
+       return true;
+}
+
+void putback_active_hugepage(struct page *page)
+{
+       VM_BUG_ON(!PageHead(page));
+       spin_lock(&hugetlb_lock);
+       list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
+       spin_unlock(&hugetlb_lock);
+       put_page(page);
+}
+
+bool is_hugepage_active(struct page *page)
+{
+       VM_BUG_ON(!PageHuge(page));
+       /*
+        * This function can be called for a tail page because the caller,
+        * scan_movable_pages, scans through a given pfn-range which typically
+        * covers one memory block. In systems using gigantic hugepage (1GB
+        * for x86_64,) a hugepage is larger than a memory block, and we don't
+        * support migrating such large hugepages for now, so return false
+        * when called for tail pages.
+        */
+       if (PageTail(page))
+               return false;
+       /*
+        * Refcount of a hwpoisoned hugepages is 1, but they are not active,
+        * so we should return false for them.
+        */
+       if (unlikely(PageHWPoison(page)))
+               return false;
+       return page_count(page) > 0;
+}
index 3a61efc..afc2daa 100644 (file)
@@ -88,12 +88,12 @@ static int pfn_inject_init(void)
         * hardware status change, hence do not require hardware support.
         * They are mainly for testing hwpoison in software level.
         */
-       dentry = debugfs_create_file("corrupt-pfn", 0600, hwpoison_dir,
+       dentry = debugfs_create_file("corrupt-pfn", 0200, hwpoison_dir,
                                          NULL, &hwpoison_fops);
        if (!dentry)
                goto fail;
 
-       dentry = debugfs_create_file("unpoison-pfn", 0600, hwpoison_dir,
+       dentry = debugfs_create_file("unpoison-pfn", 0200, hwpoison_dir,
                                     NULL, &unpoison_fops);
        if (!dentry)
                goto fail;
index 4390ac6..684f7aa 100644 (file)
@@ -85,6 +85,8 @@ extern unsigned long highest_memmap_pfn;
  */
 extern int isolate_lru_page(struct page *page);
 extern void putback_lru_page(struct page *page);
+extern unsigned long zone_reclaimable_pages(struct zone *zone);
+extern bool zone_reclaimable(struct zone *zone);
 
 /*
  * in mm/rmap.c:
index c8d7f31..e126b0e 100644 (file)
@@ -1639,7 +1639,7 @@ static ssize_t kmemleak_write(struct file *file, const char __user *user_buf,
        else if (strncmp(buf, "scan=", 5) == 0) {
                unsigned long secs;
 
-               ret = strict_strtoul(buf + 5, 0, &secs);
+               ret = kstrtoul(buf + 5, 0, &secs);
                if (ret < 0)
                        goto out;
                stop_scan_thread();
index b6afe0c..0bea2b2 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2194,7 +2194,7 @@ static ssize_t sleep_millisecs_store(struct kobject *kobj,
        unsigned long msecs;
        int err;
 
-       err = strict_strtoul(buf, 10, &msecs);
+       err = kstrtoul(buf, 10, &msecs);
        if (err || msecs > UINT_MAX)
                return -EINVAL;
 
@@ -2217,7 +2217,7 @@ static ssize_t pages_to_scan_store(struct kobject *kobj,
        int err;
        unsigned long nr_pages;
 
-       err = strict_strtoul(buf, 10, &nr_pages);
+       err = kstrtoul(buf, 10, &nr_pages);
        if (err || nr_pages > UINT_MAX)
                return -EINVAL;
 
@@ -2239,7 +2239,7 @@ static ssize_t run_store(struct kobject *kobj, struct kobj_attribute *attr,
        int err;
        unsigned long flags;
 
-       err = strict_strtoul(buf, 10, &flags);
+       err = kstrtoul(buf, 10, &flags);
        if (err || flags > UINT_MAX)
                return -EINVAL;
        if (flags > KSM_RUN_UNMERGE)
diff --git a/mm/list_lru.c b/mm/list_lru.c
new file mode 100644 (file)
index 0000000..7246791
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2013 Red Hat, Inc. and Parallels Inc. All rights reserved.
+ * Authors: David Chinner and Glauber Costa
+ *
+ * Generic LRU infrastructure
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/list_lru.h>
+#include <linux/slab.h>
+
+bool list_lru_add(struct list_lru *lru, struct list_head *item)
+{
+       int nid = page_to_nid(virt_to_page(item));
+       struct list_lru_node *nlru = &lru->node[nid];
+
+       spin_lock(&nlru->lock);
+       WARN_ON_ONCE(nlru->nr_items < 0);
+       if (list_empty(item)) {
+               list_add_tail(item, &nlru->list);
+               if (nlru->nr_items++ == 0)
+                       node_set(nid, lru->active_nodes);
+               spin_unlock(&nlru->lock);
+               return true;
+       }
+       spin_unlock(&nlru->lock);
+       return false;
+}
+EXPORT_SYMBOL_GPL(list_lru_add);
+
+bool list_lru_del(struct list_lru *lru, struct list_head *item)
+{
+       int nid = page_to_nid(virt_to_page(item));
+       struct list_lru_node *nlru = &lru->node[nid];
+
+       spin_lock(&nlru->lock);
+       if (!list_empty(item)) {
+               list_del_init(item);
+               if (--nlru->nr_items == 0)
+                       node_clear(nid, lru->active_nodes);
+               WARN_ON_ONCE(nlru->nr_items < 0);
+               spin_unlock(&nlru->lock);
+               return true;
+       }
+       spin_unlock(&nlru->lock);
+       return false;
+}
+EXPORT_SYMBOL_GPL(list_lru_del);
+
+unsigned long
+list_lru_count_node(struct list_lru *lru, int nid)
+{
+       unsigned long count = 0;
+       struct list_lru_node *nlru = &lru->node[nid];
+
+       spin_lock(&nlru->lock);
+       WARN_ON_ONCE(nlru->nr_items < 0);
+       count += nlru->nr_items;
+       spin_unlock(&nlru->lock);
+
+       return count;
+}
+EXPORT_SYMBOL_GPL(list_lru_count_node);
+
+unsigned long
+list_lru_walk_node(struct list_lru *lru, int nid, list_lru_walk_cb isolate,
+                  void *cb_arg, unsigned long *nr_to_walk)
+{
+
+       struct list_lru_node    *nlru = &lru->node[nid];
+       struct list_head *item, *n;
+       unsigned long isolated = 0;
+
+       spin_lock(&nlru->lock);
+restart:
+       list_for_each_safe(item, n, &nlru->list) {
+               enum lru_status ret;
+
+               /*
+                * decrement nr_to_walk first so that we don't livelock if we
+                * get stuck on large numbesr of LRU_RETRY items
+                */
+               if (--(*nr_to_walk) == 0)
+                       break;
+
+               ret = isolate(item, &nlru->lock, cb_arg);
+               switch (ret) {
+               case LRU_REMOVED:
+                       if (--nlru->nr_items == 0)
+                               node_clear(nid, lru->active_nodes);
+                       WARN_ON_ONCE(nlru->nr_items < 0);
+                       isolated++;
+                       break;
+               case LRU_ROTATE:
+                       list_move_tail(item, &nlru->list);
+                       break;
+               case LRU_SKIP:
+                       break;
+               case LRU_RETRY:
+                       /*
+                        * The lru lock has been dropped, our list traversal is
+                        * now invalid and so we have to restart from scratch.
+                        */
+                       goto restart;
+               default:
+                       BUG();
+               }
+       }
+
+       spin_unlock(&nlru->lock);
+       return isolated;
+}
+EXPORT_SYMBOL_GPL(list_lru_walk_node);
+
+int list_lru_init(struct list_lru *lru)
+{
+       int i;
+       size_t size = sizeof(*lru->node) * nr_node_ids;
+
+       lru->node = kzalloc(size, GFP_KERNEL);
+       if (!lru->node)
+               return -ENOMEM;
+
+       nodes_clear(lru->active_nodes);
+       for (i = 0; i < nr_node_ids; i++) {
+               spin_lock_init(&lru->node[i].lock);
+               INIT_LIST_HEAD(&lru->node[i].list);
+               lru->node[i].nr_items = 0;
+       }
+       return 0;
+}
+EXPORT_SYMBOL_GPL(list_lru_init);
+
+void list_lru_destroy(struct list_lru *lru)
+{
+       kfree(lru->node);
+}
+EXPORT_SYMBOL_GPL(list_lru_destroy);
index 7055883..6975bc8 100644 (file)
@@ -42,11 +42,11 @@ static int madvise_need_mmap_write(int behavior)
  * We can potentially split a vm area into separate
  * areas, each area with its own behavior.
  */
-static long madvise_behavior(struct vm_area_struct * vma,
+static long madvise_behavior(struct vm_area_struct *vma,
                     struct vm_area_struct **prev,
                     unsigned long start, unsigned long end, int behavior)
 {
-       struct mm_struct * mm = vma->vm_mm;
+       struct mm_struct *mm = vma->vm_mm;
        int error = 0;
        pgoff_t pgoff;
        unsigned long new_flags = vma->vm_flags;
@@ -215,8 +215,8 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,
 /*
  * Schedule all required I/O operations.  Do not wait for completion.
  */
-static long madvise_willneed(struct vm_area_struct * vma,
-                            struct vm_area_struct ** prev,
+static long madvise_willneed(struct vm_area_struct *vma,
+                            struct vm_area_struct **prev,
                             unsigned long start, unsigned long end)
 {
        struct file *file = vma->vm_file;
@@ -270,8 +270,8 @@ static long madvise_willneed(struct vm_area_struct * vma,
  * An interface that causes the system to free clean pages and flush
  * dirty pages is already available as msync(MS_INVALIDATE).
  */
-static long madvise_dontneed(struct vm_area_struct * vma,
-                            struct vm_area_struct ** prev,
+static long madvise_dontneed(struct vm_area_struct *vma,
+                            struct vm_area_struct **prev,
                             unsigned long start, unsigned long end)
 {
        *prev = vma;
@@ -343,29 +343,34 @@ static long madvise_remove(struct vm_area_struct *vma,
  */
 static int madvise_hwpoison(int bhv, unsigned long start, unsigned long end)
 {
-       int ret = 0;
-
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
        for (; start < end; start += PAGE_SIZE) {
                struct page *p;
-               int ret = get_user_pages_fast(start, 1, 0, &p);
+               int ret;
+
+               ret = get_user_pages_fast(start, 1, 0, &p);
                if (ret != 1)
                        return ret;
+
+               if (PageHWPoison(p)) {
+                       put_page(p);
+                       continue;
+               }
                if (bhv == MADV_SOFT_OFFLINE) {
-                       printk(KERN_INFO "Soft offlining page %lx at %lx\n",
+                       pr_info("Soft offlining page %#lx at %#lx\n",
                                page_to_pfn(p), start);
                        ret = soft_offline_page(p, MF_COUNT_INCREASED);
                        if (ret)
-                               break;
+                               return ret;
                        continue;
                }
-               printk(KERN_INFO "Injecting memory failure for page %lx at %lx\n",
+               pr_info("Injecting memory failure for page %#lx at %#lx\n",
                       page_to_pfn(p), start);
                /* Ignore return value for now */
                memory_failure(page_to_pfn(p), 0, MF_COUNT_INCREASED);
        }
-       return ret;
+       return 0;
 }
 #endif
 
@@ -459,7 +464,7 @@ madvise_behavior_valid(int behavior)
 SYSCALL_DEFINE3(madvise, unsigned long, start, size_t, len_in, int, behavior)
 {
        unsigned long end, tmp;
-       struct vm_area_struct * vma, *prev;
+       struct vm_area_struct *vma, *prev;
        int unmapped_error = 0;
        int error = -EINVAL;
        int write;
index a847bfe..0ac412a 100644 (file)
@@ -914,6 +914,24 @@ int __init_memblock memblock_is_memory(phys_addr_t addr)
        return memblock_search(&memblock.memory, addr) != -1;
 }
 
+#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
+int __init_memblock memblock_search_pfn_nid(unsigned long pfn,
+                        unsigned long *start_pfn, unsigned long *end_pfn)
+{
+       struct memblock_type *type = &memblock.memory;
+       int mid = memblock_search(type, (phys_addr_t)pfn << PAGE_SHIFT);
+
+       if (mid == -1)
+               return -1;
+
+       *start_pfn = type->regions[mid].base >> PAGE_SHIFT;
+       *end_pfn = (type->regions[mid].base + type->regions[mid].size)
+                       >> PAGE_SHIFT;
+
+       return type->regions[mid].nid;
+}
+#endif
+
 /**
  * memblock_is_region_memory - check if a region is a subset of memory
  * @base: base of region to check
index 3b83957..d5ff3ce 100644 (file)
@@ -39,7 +39,6 @@
 #include <linux/limits.h>
 #include <linux/export.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
 #include <linux/slab.h>
 #include <linux/swap.h>
 #include <linux/swapops.h>
@@ -85,26 +84,12 @@ static int really_do_swap_account __initdata = 0;
 #endif
 
 
-/*
- * Statistics for memory cgroup.
- */
-enum mem_cgroup_stat_index {
-       /*
-        * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss.
-        */
-       MEM_CGROUP_STAT_CACHE,          /* # of pages charged as cache */
-       MEM_CGROUP_STAT_RSS,            /* # of pages charged as anon rss */
-       MEM_CGROUP_STAT_RSS_HUGE,       /* # of pages charged as anon huge */
-       MEM_CGROUP_STAT_FILE_MAPPED,    /* # of pages charged as file rss */
-       MEM_CGROUP_STAT_SWAP,           /* # of pages, swapped out */
-       MEM_CGROUP_STAT_NSTATS,
-};
-
 static const char * const mem_cgroup_stat_names[] = {
        "cache",
        "rss",
        "rss_huge",
        "mapped_file",
+       "writeback",
        "swap",
 };
 
@@ -175,10 +160,6 @@ struct mem_cgroup_per_zone {
 
        struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
-       struct rb_node          tree_node;      /* RB tree node */
-       unsigned long long      usage_in_excess;/* Set to the value by which */
-                                               /* the soft limit is exceeded*/
-       bool                    on_tree;
        struct mem_cgroup       *memcg;         /* Back pointer, we cannot */
                                                /* use container_of        */
 };
@@ -187,26 +168,6 @@ struct mem_cgroup_per_node {
        struct mem_cgroup_per_zone zoneinfo[MAX_NR_ZONES];
 };
 
-/*
- * Cgroups above their limits are maintained in a RB-Tree, independent of
- * their hierarchy representation
- */
-
-struct mem_cgroup_tree_per_zone {
-       struct rb_root rb_root;
-       spinlock_t lock;
-};
-
-struct mem_cgroup_tree_per_node {
-       struct mem_cgroup_tree_per_zone rb_tree_per_zone[MAX_NR_ZONES];
-};
-
-struct mem_cgroup_tree {
-       struct mem_cgroup_tree_per_node *rb_tree_per_node[MAX_NUMNODES];
-};
-
-static struct mem_cgroup_tree soft_limit_tree __read_mostly;
-
 struct mem_cgroup_threshold {
        struct eventfd_ctx *eventfd;
        u64 threshold;
@@ -280,6 +241,7 @@ struct mem_cgroup {
 
        bool            oom_lock;
        atomic_t        under_oom;
+       atomic_t        oom_wakeups;
 
        int     swappiness;
        /* OOM-Killer disable */
@@ -304,7 +266,7 @@ struct mem_cgroup {
         * Should we move charges of a task when a task is moved into this
         * mem_cgroup ? And what type of charges should we move ?
         */
-       unsigned long   move_charge_at_immigrate;
+       unsigned long move_charge_at_immigrate;
        /*
         * set > 0 if pages under this cgroup are moving to other cgroup.
         */
@@ -341,6 +303,22 @@ struct mem_cgroup {
        atomic_t        numainfo_events;
        atomic_t        numainfo_updating;
 #endif
+       /*
+        * Protects soft_contributed transitions.
+        * See mem_cgroup_update_soft_limit
+        */
+       spinlock_t soft_lock;
+
+       /*
+        * If true then this group has increased parents' children_in_excess
+        * when it got over the soft limit.
+        * When a group falls bellow the soft limit, parents' children_in_excess
+        * is decreased and soft_contributed changed to false.
+        */
+       bool soft_contributed;
+
+       /* Number of children that are in soft limit excess */
+       atomic_t children_in_excess;
 
        struct mem_cgroup_per_node *nodeinfo[0];
        /* WARNING: nodeinfo must be the last member here */
@@ -444,7 +422,6 @@ static bool move_file(void)
  * limit reclaim to prevent infinite loops, if they ever occur.
  */
 #define        MEM_CGROUP_MAX_RECLAIM_LOOPS            100
-#define        MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS 2
 
 enum charge_type {
        MEM_CGROUP_CHARGE_TYPE_CACHE = 0,
@@ -671,164 +648,6 @@ page_cgroup_zoneinfo(struct mem_cgroup *memcg, struct page *page)
        return mem_cgroup_zoneinfo(memcg, nid, zid);
 }
 
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_node_zone(int nid, int zid)
-{
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
-}
-
-static struct mem_cgroup_tree_per_zone *
-soft_limit_tree_from_page(struct page *page)
-{
-       int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
-
-       return &soft_limit_tree.rb_tree_per_node[nid]->rb_tree_per_zone[zid];
-}
-
-static void
-__mem_cgroup_insert_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz,
-                               unsigned long long new_usage_in_excess)
-{
-       struct rb_node **p = &mctz->rb_root.rb_node;
-       struct rb_node *parent = NULL;
-       struct mem_cgroup_per_zone *mz_node;
-
-       if (mz->on_tree)
-               return;
-
-       mz->usage_in_excess = new_usage_in_excess;
-       if (!mz->usage_in_excess)
-               return;
-       while (*p) {
-               parent = *p;
-               mz_node = rb_entry(parent, struct mem_cgroup_per_zone,
-                                       tree_node);
-               if (mz->usage_in_excess < mz_node->usage_in_excess)
-                       p = &(*p)->rb_left;
-               /*
-                * We can't avoid mem cgroups that are over their soft
-                * limit by the same amount
-                */
-               else if (mz->usage_in_excess >= mz_node->usage_in_excess)
-                       p = &(*p)->rb_right;
-       }
-       rb_link_node(&mz->tree_node, parent, p);
-       rb_insert_color(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = true;
-}
-
-static void
-__mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz)
-{
-       if (!mz->on_tree)
-               return;
-       rb_erase(&mz->tree_node, &mctz->rb_root);
-       mz->on_tree = false;
-}
-
-static void
-mem_cgroup_remove_exceeded(struct mem_cgroup *memcg,
-                               struct mem_cgroup_per_zone *mz,
-                               struct mem_cgroup_tree_per_zone *mctz)
-{
-       spin_lock(&mctz->lock);
-       __mem_cgroup_remove_exceeded(memcg, mz, mctz);
-       spin_unlock(&mctz->lock);
-}
-
-
-static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
-{
-       unsigned long long excess;
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup_tree_per_zone *mctz;
-       int nid = page_to_nid(page);
-       int zid = page_zonenum(page);
-       mctz = soft_limit_tree_from_page(page);
-
-       /*
-        * Necessary to update all ancestors when hierarchy is used.
-        * because their event counter is not touched.
-        */
-       for (; memcg; memcg = parent_mem_cgroup(memcg)) {
-               mz = mem_cgroup_zoneinfo(memcg, nid, zid);
-               excess = res_counter_soft_limit_excess(&memcg->res);
-               /*
-                * We have to update the tree if mz is on RB-tree or
-                * mem is over its softlimit.
-                */
-               if (excess || mz->on_tree) {
-                       spin_lock(&mctz->lock);
-                       /* if on-tree, remove it */
-                       if (mz->on_tree)
-                               __mem_cgroup_remove_exceeded(memcg, mz, mctz);
-                       /*
-                        * Insert again. mz->usage_in_excess will be updated.
-                        * If excess is 0, no tree ops.
-                        */
-                       __mem_cgroup_insert_exceeded(memcg, mz, mctz, excess);
-                       spin_unlock(&mctz->lock);
-               }
-       }
-}
-
-static void mem_cgroup_remove_from_trees(struct mem_cgroup *memcg)
-{
-       int node, zone;
-       struct mem_cgroup_per_zone *mz;
-       struct mem_cgroup_tree_per_zone *mctz;
-
-       for_each_node(node) {
-               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       mz = mem_cgroup_zoneinfo(memcg, node, zone);
-                       mctz = soft_limit_tree_node_zone(node, zone);
-                       mem_cgroup_remove_exceeded(memcg, mz, mctz);
-               }
-       }
-}
-
-static struct mem_cgroup_per_zone *
-__mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
-{
-       struct rb_node *rightmost = NULL;
-       struct mem_cgroup_per_zone *mz;
-
-retry:
-       mz = NULL;
-       rightmost = rb_last(&mctz->rb_root);
-       if (!rightmost)
-               goto done;              /* Nothing to reclaim from */
-
-       mz = rb_entry(rightmost, struct mem_cgroup_per_zone, tree_node);
-       /*
-        * Remove the node now but someone else can add it back,
-        * we will to add it back at the end of reclaim to its correct
-        * position in the tree.
-        */
-       __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
-       if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
-               !css_tryget(&mz->memcg->css))
-               goto retry;
-done:
-       return mz;
-}
-
-static struct mem_cgroup_per_zone *
-mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
-{
-       struct mem_cgroup_per_zone *mz;
-
-       spin_lock(&mctz->lock);
-       mz = __mem_cgroup_largest_soft_limit_node(mctz);
-       spin_unlock(&mctz->lock);
-       return mz;
-}
-
 /*
  * Implementation Note: reading percpu statistics for memcg.
  *
@@ -1002,6 +821,48 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
        return false;
 }
 
+/*
+ * Called from rate-limited memcg_check_events when enough
+ * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure
+ * that all the parents up the hierarchy will be notified that this group
+ * is in excess or that it is not in excess anymore. mmecg->soft_contributed
+ * makes the transition a single action whenever the state flips from one to
+ * the other.
+ */
+static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg)
+{
+       unsigned long long excess = res_counter_soft_limit_excess(&memcg->res);
+       struct mem_cgroup *parent = memcg;
+       int delta = 0;
+
+       spin_lock(&memcg->soft_lock);
+       if (excess) {
+               if (!memcg->soft_contributed) {
+                       delta = 1;
+                       memcg->soft_contributed = true;
+               }
+       } else {
+               if (memcg->soft_contributed) {
+                       delta = -1;
+                       memcg->soft_contributed = false;
+               }
+       }
+
+       /*
+        * Necessary to update all ancestors when hierarchy is used
+        * because their event counter is not touched.
+        * We track children even outside the hierarchy for the root
+        * cgroup because tree walk starting at root should visit
+        * all cgroups and we want to prevent from pointless tree
+        * walk if no children is below the limit.
+        */
+       while (delta && (parent = parent_mem_cgroup(parent)))
+               atomic_add(delta, &parent->children_in_excess);
+       if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
+               atomic_add(delta, &root_mem_cgroup->children_in_excess);
+       spin_unlock(&memcg->soft_lock);
+}
+
 /*
  * Check events in order.
  *
@@ -1025,7 +886,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
 
                mem_cgroup_threshold(memcg);
                if (unlikely(do_softlimit))
-                       mem_cgroup_update_tree(memcg, page);
+                       mem_cgroup_update_soft_limit(memcg);
 #if MAX_NUMNODES > 1
                if (unlikely(do_numainfo))
                        atomic_inc(&memcg->numainfo_events);
@@ -1068,6 +929,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
        return memcg;
 }
 
+static enum mem_cgroup_filter_t
+mem_cgroup_filter(struct mem_cgroup *memcg, struct mem_cgroup *root,
+               mem_cgroup_iter_filter cond)
+{
+       if (!cond)
+               return VISIT;
+       return cond(memcg, root);
+}
+
 /*
  * Returns a next (in a pre-order walk) alive memcg (with elevated css
  * ref. count) or NULL if the whole root's subtree has been visited.
@@ -1075,7 +945,7 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
  * helper function to be used by mem_cgroup_iter
  */
 static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
-               struct mem_cgroup *last_visited)
+               struct mem_cgroup *last_visited, mem_cgroup_iter_filter cond)
 {
        struct cgroup_subsys_state *prev_css, *next_css;
 
@@ -1093,11 +963,31 @@ skip_node:
        if (next_css) {
                struct mem_cgroup *mem = mem_cgroup_from_css(next_css);
 
-               if (css_tryget(&mem->css))
-                       return mem;
-               else {
+               switch (mem_cgroup_filter(mem, root, cond)) {
+               case SKIP:
                        prev_css = next_css;
                        goto skip_node;
+               case SKIP_TREE:
+                       if (mem == root)
+                               return NULL;
+                       /*
+                        * css_rightmost_descendant is not an optimal way to
+                        * skip through a subtree (especially for imbalanced
+                        * trees leaning to right) but that's what we have right
+                        * now. More effective solution would be traversing
+                        * right-up for first non-NULL without calling
+                        * css_next_descendant_pre afterwards.
+                        */
+                       prev_css = css_rightmost_descendant(next_css);
+                       goto skip_node;
+               case VISIT:
+                       if (css_tryget(&mem->css))
+                               return mem;
+                       else {
+                               prev_css = next_css;
+                               goto skip_node;
+                       }
+                       break;
                }
        }
 
@@ -1161,6 +1051,7 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * @root: hierarchy root
  * @prev: previously returned memcg, NULL on first invocation
  * @reclaim: cookie for shared reclaim walks, NULL for full walks
+ * @cond: filter for visited nodes, NULL for no filter
  *
  * Returns references to children of the hierarchy below @root, or
  * @root itself, or %NULL after a full round-trip.
@@ -1173,15 +1064,18 @@ static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
  * divide up the memcgs in the hierarchy among all concurrent
  * reclaimers operating on the same zone and priority.
  */
-struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
+struct mem_cgroup *mem_cgroup_iter_cond(struct mem_cgroup *root,
                                   struct mem_cgroup *prev,
-                                  struct mem_cgroup_reclaim_cookie *reclaim)
+                                  struct mem_cgroup_reclaim_cookie *reclaim,
+                                  mem_cgroup_iter_filter cond)
 {
        struct mem_cgroup *memcg = NULL;
        struct mem_cgroup *last_visited = NULL;
 
-       if (mem_cgroup_disabled())
-               return NULL;
+       if (mem_cgroup_disabled()) {
+               /* first call must return non-NULL, second return NULL */
+               return (struct mem_cgroup *)(unsigned long)!prev;
+       }
 
        if (!root)
                root = root_mem_cgroup;
@@ -1192,7 +1086,9 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
        if (!root->use_hierarchy && root != root_mem_cgroup) {
                if (prev)
                        goto out_css_put;
-               return root;
+               if (mem_cgroup_filter(root, root, cond) == VISIT)
+                       return root;
+               return NULL;
        }
 
        rcu_read_lock();
@@ -1215,7 +1111,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                        last_visited = mem_cgroup_iter_load(iter, root, &seq);
                }
 
-               memcg = __mem_cgroup_iter_next(root, last_visited);
+               memcg = __mem_cgroup_iter_next(root, last_visited, cond);
 
                if (reclaim) {
                        mem_cgroup_iter_update(iter, last_visited, memcg, seq);
@@ -1226,7 +1122,11 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
                                reclaim->generation = iter->generation;
                }
 
-               if (prev && !memcg)
+               /*
+                * We have finished the whole tree walk or no group has been
+                * visited because filter told us to skip the root node.
+                */
+               if (!memcg && (prev || (cond && !last_visited)))
                        goto out_unlock;
        }
 out_unlock:
@@ -1867,6 +1767,7 @@ static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
        return total;
 }
 
+#if MAX_NUMNODES > 1
 /**
  * test_mem_cgroup_node_reclaimable
  * @memcg: the target memcg
@@ -1889,7 +1790,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
        return false;
 
 }
-#if MAX_NUMNODES > 1
 
 /*
  * Always updating the nodemask is not very good - even if we have an empty
@@ -1957,115 +1857,64 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
        return node;
 }
 
-/*
- * Check all nodes whether it contains reclaimable pages or not.
- * For quick scan, we make use of scan_nodes. This will allow us to skip
- * unused nodes. But scan_nodes is lazily updated and may not cotain
- * enough new information. We need to do double check.
- */
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       int nid;
-
-       /*
-        * quick check...making use of scan_node.
-        * We can skip unused nodes.
-        */
-       if (!nodes_empty(memcg->scan_nodes)) {
-               for (nid = first_node(memcg->scan_nodes);
-                    nid < MAX_NUMNODES;
-                    nid = next_node(nid, memcg->scan_nodes)) {
-
-                       if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                               return true;
-               }
-       }
-       /*
-        * Check rest of nodes.
-        */
-       for_each_node_state(nid, N_MEMORY) {
-               if (node_isset(nid, memcg->scan_nodes))
-                       continue;
-               if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
-                       return true;
-       }
-       return false;
-}
-
 #else
 int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
 {
        return 0;
 }
 
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
-       return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
-}
 #endif
 
-static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
-                                  struct zone *zone,
-                                  gfp_t gfp_mask,
-                                  unsigned long *total_scanned)
-{
-       struct mem_cgroup *victim = NULL;
-       int total = 0;
-       int loop = 0;
-       unsigned long excess;
-       unsigned long nr_scanned;
-       struct mem_cgroup_reclaim_cookie reclaim = {
-               .zone = zone,
-               .priority = 0,
-       };
-
-       excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
-
-       while (1) {
-               victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
-               if (!victim) {
-                       loop++;
-                       if (loop >= 2) {
-                               /*
-                                * If we have not been able to reclaim
-                                * anything, it might because there are
-                                * no reclaimable pages under this hierarchy
-                                */
-                               if (!total)
-                                       break;
-                               /*
-                                * We want to do more targeted reclaim.
-                                * excess >> 2 is not to excessive so as to
-                                * reclaim too much, nor too less that we keep
-                                * coming back to reclaim from this cgroup
-                                */
-                               if (total >= (excess >> 2) ||
-                                       (loop > MEM_CGROUP_MAX_RECLAIM_LOOPS))
-                                       break;
-                       }
-                       continue;
-               }
-               if (!mem_cgroup_reclaimable(victim, false))
-                       continue;
-               total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
-                                                    zone, &nr_scanned);
-               *total_scanned += nr_scanned;
-               if (!res_counter_soft_limit_excess(&root_memcg->res))
+/*
+ * A group is eligible for the soft limit reclaim under the given root
+ * hierarchy if
+ *     a) it is over its soft limit
+ *     b) any parent up the hierarchy is over its soft limit
+ *
+ * If the given group doesn't have any children over the limit then it
+ * doesn't make any sense to iterate its subtree.
+ */
+enum mem_cgroup_filter_t
+mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg,
+               struct mem_cgroup *root)
+{
+       struct mem_cgroup *parent;
+
+       if (!memcg)
+               memcg = root_mem_cgroup;
+       parent = memcg;
+
+       if (res_counter_soft_limit_excess(&memcg->res))
+               return VISIT;
+
+       /*
+        * If any parent up to the root in the hierarchy is over its soft limit
+        * then we have to obey and reclaim from this group as well.
+        */
+       while ((parent = parent_mem_cgroup(parent))) {
+               if (res_counter_soft_limit_excess(&parent->res))
+                       return VISIT;
+               if (parent == root)
                        break;
        }
-       mem_cgroup_iter_break(root_memcg, victim);
-       return total;
+
+       if (!atomic_read(&memcg->children_in_excess))
+               return SKIP_TREE;
+       return SKIP;
 }
 
+static DEFINE_SPINLOCK(memcg_oom_lock);
+
 /*
  * Check OOM-Killer is already running under our hierarchy.
  * If someone is running, return false.
- * Has to be called with memcg_oom_lock
  */
-static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
+static bool mem_cgroup_oom_trylock(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter, *failed = NULL;
 
+       spin_lock(&memcg_oom_lock);
+
        for_each_mem_cgroup_tree(iter, memcg) {
                if (iter->oom_lock) {
                        /*
@@ -2079,33 +1928,33 @@ static bool mem_cgroup_oom_lock(struct mem_cgroup *memcg)
                        iter->oom_lock = true;
        }
 
-       if (!failed)
-               return true;
-
-       /*
-        * OK, we failed to lock the whole subtree so we have to clean up
-        * what we set up to the failing subtree
-        */
-       for_each_mem_cgroup_tree(iter, memcg) {
-               if (iter == failed) {
-                       mem_cgroup_iter_break(memcg, iter);
-                       break;
+       if (failed) {
+               /*
+                * OK, we failed to lock the whole subtree so we have
+                * to clean up what we set up to the failing subtree
+                */
+               for_each_mem_cgroup_tree(iter, memcg) {
+                       if (iter == failed) {
+                               mem_cgroup_iter_break(memcg, iter);
+                               break;
+                       }
+                       iter->oom_lock = false;
                }
-               iter->oom_lock = false;
        }
-       return false;
+
+       spin_unlock(&memcg_oom_lock);
+
+       return !failed;
 }
 
-/*
- * Has to be called with memcg_oom_lock
- */
-static int mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
+static void mem_cgroup_oom_unlock(struct mem_cgroup *memcg)
 {
        struct mem_cgroup *iter;
 
+       spin_lock(&memcg_oom_lock);
        for_each_mem_cgroup_tree(iter, memcg)
                iter->oom_lock = false;
-       return 0;
+       spin_unlock(&memcg_oom_lock);
 }
 
 static void mem_cgroup_mark_under_oom(struct mem_cgroup *memcg)
@@ -2129,7 +1978,6 @@ static void mem_cgroup_unmark_under_oom(struct mem_cgroup *memcg)
                atomic_add_unless(&iter->under_oom, -1, 0);
 }
 
-static DEFINE_SPINLOCK(memcg_oom_lock);
 static DECLARE_WAIT_QUEUE_HEAD(memcg_oom_waitq);
 
 struct oom_wait_info {
@@ -2159,6 +2007,7 @@ static int memcg_oom_wake_function(wait_queue_t *wait,
 
 static void memcg_wakeup_oom(struct mem_cgroup *memcg)
 {
+       atomic_inc(&memcg->oom_wakeups);
        /* for filtering, pass "memcg" as argument. */
        __wake_up(&memcg_oom_waitq, TASK_NORMAL, 0, memcg);
 }
@@ -2170,56 +2019,136 @@ static void memcg_oom_recover(struct mem_cgroup *memcg)
 }
 
 /*
- * try to call OOM killer. returns false if we should exit memory-reclaim loop.
+ * try to call OOM killer
  */
-static bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask,
-                                 int order)
+static void mem_cgroup_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
 {
-       struct oom_wait_info owait;
-       bool locked, need_to_kill;
+       bool locked;
+       int wakeups;
 
-       owait.memcg = memcg;
-       owait.wait.flags = 0;
-       owait.wait.func = memcg_oom_wake_function;
-       owait.wait.private = current;
-       INIT_LIST_HEAD(&owait.wait.task_list);
-       need_to_kill = true;
-       mem_cgroup_mark_under_oom(memcg);
+       if (!current->memcg_oom.may_oom)
+               return;
+
+       current->memcg_oom.in_memcg_oom = 1;
 
-       /* At first, try to OOM lock hierarchy under memcg.*/
-       spin_lock(&memcg_oom_lock);
-       locked = mem_cgroup_oom_lock(memcg);
        /*
-        * Even if signal_pending(), we can't quit charge() loop without
-        * accounting. So, UNINTERRUPTIBLE is appropriate. But SIGKILL
-        * under OOM is always welcomed, use TASK_KILLABLE here.
+        * As with any blocking lock, a contender needs to start
+        * listening for wakeups before attempting the trylock,
+        * otherwise it can miss the wakeup from the unlock and sleep
+        * indefinitely.  This is just open-coded because our locking
+        * is so particular to memcg hierarchies.
         */
-       prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
-       if (!locked || memcg->oom_kill_disable)
-               need_to_kill = false;
+       wakeups = atomic_read(&memcg->oom_wakeups);
+       mem_cgroup_mark_under_oom(memcg);
+
+       locked = mem_cgroup_oom_trylock(memcg);
+
        if (locked)
                mem_cgroup_oom_notify(memcg);
-       spin_unlock(&memcg_oom_lock);
 
-       if (need_to_kill) {
-               finish_wait(&memcg_oom_waitq, &owait.wait);
+       if (locked && !memcg->oom_kill_disable) {
+               mem_cgroup_unmark_under_oom(memcg);
                mem_cgroup_out_of_memory(memcg, mask, order);
+               mem_cgroup_oom_unlock(memcg);
+               /*
+                * There is no guarantee that an OOM-lock contender
+                * sees the wakeups triggered by the OOM kill
+                * uncharges.  Wake any sleepers explicitely.
+                */
+               memcg_oom_recover(memcg);
        } else {
-               schedule();
-               finish_wait(&memcg_oom_waitq, &owait.wait);
+               /*
+                * A system call can just return -ENOMEM, but if this
+                * is a page fault and somebody else is handling the
+                * OOM already, we need to sleep on the OOM waitqueue
+                * for this memcg until the situation is resolved.
+                * Which can take some time because it might be
+                * handled by a userspace task.
+                *
+                * However, this is the charge context, which means
+                * that we may sit on a large call stack and hold
+                * various filesystem locks, the mmap_sem etc. and we
+                * don't want the OOM handler to deadlock on them
+                * while we sit here and wait.  Store the current OOM
+                * context in the task_struct, then return -ENOMEM.
+                * At the end of the page fault handler, with the
+                * stack unwound, pagefault_out_of_memory() will check
+                * back with us by calling
+                * mem_cgroup_oom_synchronize(), possibly putting the
+                * task to sleep.
+                */
+               current->memcg_oom.oom_locked = locked;
+               current->memcg_oom.wakeups = wakeups;
+               css_get(&memcg->css);
+               current->memcg_oom.wait_on_memcg = memcg;
        }
-       spin_lock(&memcg_oom_lock);
-       if (locked)
-               mem_cgroup_oom_unlock(memcg);
-       memcg_wakeup_oom(memcg);
-       spin_unlock(&memcg_oom_lock);
+}
 
-       mem_cgroup_unmark_under_oom(memcg);
+/**
+ * mem_cgroup_oom_synchronize - complete memcg OOM handling
+ *
+ * This has to be called at the end of a page fault if the the memcg
+ * OOM handler was enabled and the fault is returning %VM_FAULT_OOM.
+ *
+ * Memcg supports userspace OOM handling, so failed allocations must
+ * sleep on a waitqueue until the userspace task resolves the
+ * situation.  Sleeping directly in the charge context with all kinds
+ * of locks held is not a good idea, instead we remember an OOM state
+ * in the task and mem_cgroup_oom_synchronize() has to be called at
+ * the end of the page fault to put the task to sleep and clean up the
+ * OOM state.
+ *
+ * Returns %true if an ongoing memcg OOM situation was detected and
+ * finalized, %false otherwise.
+ */
+bool mem_cgroup_oom_synchronize(void)
+{
+       struct oom_wait_info owait;
+       struct mem_cgroup *memcg;
 
-       if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+       /* OOM is global, do not handle */
+       if (!current->memcg_oom.in_memcg_oom)
                return false;
-       /* Give chance to dying process */
-       schedule_timeout_uninterruptible(1);
+
+       /*
+        * We invoked the OOM killer but there is a chance that a kill
+        * did not free up any charges.  Everybody else might already
+        * be sleeping, so restart the fault and keep the rampage
+        * going until some charges are released.
+        */
+       memcg = current->memcg_oom.wait_on_memcg;
+       if (!memcg)
+               goto out;
+
+       if (test_thread_flag(TIF_MEMDIE) || fatal_signal_pending(current))
+               goto out_memcg;
+
+       owait.memcg = memcg;
+       owait.wait.flags = 0;
+       owait.wait.func = memcg_oom_wake_function;
+       owait.wait.private = current;
+       INIT_LIST_HEAD(&owait.wait.task_list);
+
+       prepare_to_wait(&memcg_oom_waitq, &owait.wait, TASK_KILLABLE);
+       /* Only sleep if we didn't miss any wakeups since OOM */
+       if (atomic_read(&memcg->oom_wakeups) == current->memcg_oom.wakeups)
+               schedule();
+       finish_wait(&memcg_oom_waitq, &owait.wait);
+out_memcg:
+       mem_cgroup_unmark_under_oom(memcg);
+       if (current->memcg_oom.oom_locked) {
+               mem_cgroup_oom_unlock(memcg);
+               /*
+                * There is no guarantee that an OOM-lock contender
+                * sees the wakeups triggered by the OOM kill
+                * uncharges.  Wake any sleepers explicitely.
+                */
+               memcg_oom_recover(memcg);
+       }
+       css_put(&memcg->css);
+       current->memcg_oom.wait_on_memcg = NULL;
+out:
+       current->memcg_oom.in_memcg_oom = 0;
        return true;
 }
 
@@ -2288,7 +2217,7 @@ void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
 }
 
 void mem_cgroup_update_page_stat(struct page *page,
-                                enum mem_cgroup_page_stat_item idx, int val)
+                                enum mem_cgroup_stat_index idx, int val)
 {
        struct mem_cgroup *memcg;
        struct page_cgroup *pc = lookup_page_cgroup(page);
@@ -2297,18 +2226,11 @@ void mem_cgroup_update_page_stat(struct page *page,
        if (mem_cgroup_disabled())
                return;
 
+       VM_BUG_ON(!rcu_read_lock_held());
        memcg = pc->mem_cgroup;
        if (unlikely(!memcg || !PageCgroupUsed(pc)))
                return;
 
-       switch (idx) {
-       case MEMCG_NR_FILE_MAPPED:
-               idx = MEM_CGROUP_STAT_FILE_MAPPED;
-               break;
-       default:
-               BUG();
-       }
-
        this_cpu_add(memcg->stat->count[idx], val);
 }
 
@@ -2450,7 +2372,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg, bool sync)
                        flush_work(&stock->work);
        }
 out:
-       put_online_cpus();
+       put_online_cpus();
 }
 
 /*
@@ -2532,12 +2454,11 @@ enum {
        CHARGE_RETRY,           /* need to retry but retry is not bad */
        CHARGE_NOMEM,           /* we can't do more. return -ENOMEM */
        CHARGE_WOULDBLOCK,      /* GFP_WAIT wasn't set and no enough res. */
-       CHARGE_OOM_DIE,         /* the current is killed because of OOM */
 };
 
 static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
                                unsigned int nr_pages, unsigned int min_pages,
-                               bool oom_check)
+                               bool invoke_oom)
 {
        unsigned long csize = nr_pages * PAGE_SIZE;
        struct mem_cgroup *mem_over_limit;
@@ -2594,14 +2515,10 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
        if (mem_cgroup_wait_acct_move(mem_over_limit))
                return CHARGE_RETRY;
 
-       /* If we don't need to call oom-killer at el, return immediately */
-       if (!oom_check)
-               return CHARGE_NOMEM;
-       /* check OOM */
-       if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize)))
-               return CHARGE_OOM_DIE;
+       if (invoke_oom)
+               mem_cgroup_oom(mem_over_limit, gfp_mask, get_order(csize));
 
-       return CHARGE_RETRY;
+       return CHARGE_NOMEM;
 }
 
 /*
@@ -2704,7 +2621,7 @@ again:
        }
 
        do {
-               bool oom_check;
+               bool invoke_oom = oom && !nr_oom_retries;
 
                /* If killed, bypass charge */
                if (fatal_signal_pending(current)) {
@@ -2712,14 +2629,8 @@ again:
                        goto bypass;
                }
 
-               oom_check = false;
-               if (oom && !nr_oom_retries) {
-                       oom_check = true;
-                       nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
-               }
-
-               ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, nr_pages,
-                   oom_check);
+               ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
+                                          nr_pages, invoke_oom);
                switch (ret) {
                case CHARGE_OK:
                        break;
@@ -2732,16 +2643,12 @@ again:
                        css_put(&memcg->css);
                        goto nomem;
                case CHARGE_NOMEM: /* OOM routine works */
-                       if (!oom) {
+                       if (!oom || invoke_oom) {
                                css_put(&memcg->css);
                                goto nomem;
                        }
-                       /* If oom, we never return -ENOMEM */
                        nr_oom_retries--;
                        break;
-               case CHARGE_OOM_DIE: /* Killed by OOM Killer */
-                       css_put(&memcg->css);
-                       goto bypass;
                }
        } while (ret != CHARGE_OK);
 
@@ -2882,7 +2789,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
         * is accessed after testing USED bit. To make pc->mem_cgroup visible
         * before USED bit, we need memory barrier here.
         * See mem_cgroup_add_lru_list(), etc.
-        */
+        */
        smp_wmb();
        SetPageCgroupUsed(pc);
 
@@ -2905,9 +2812,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
        unlock_page_cgroup(pc);
 
        /*
-        * "charge_statistics" updated event counter. Then, check it.
-        * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
-        * if they exceeds softlimit.
+        * "charge_statistics" updated event counter.
         */
        memcg_check_events(memcg, page);
 }
@@ -3121,7 +3026,7 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
                ssize_t size = memcg_caches_array_size(num_groups);
 
                size *= sizeof(void *);
-               size += sizeof(struct memcg_cache_params);
+               size += offsetof(struct memcg_cache_params, memcg_caches);
 
                s->memcg_params = kzalloc(size, GFP_KERNEL);
                if (!s->memcg_params) {
@@ -3164,13 +3069,16 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s,
                         struct kmem_cache *root_cache)
 {
-       size_t size = sizeof(struct memcg_cache_params);
+       size_t size;
 
        if (!memcg_kmem_enabled())
                return 0;
 
-       if (!memcg)
+       if (!memcg) {
+               size = offsetof(struct memcg_cache_params, memcg_caches);
                size += memcg_limited_groups_array_size * sizeof(void *);
+       } else
+               size = sizeof(struct memcg_cache_params);
 
        s->memcg_params = kzalloc(size, GFP_KERNEL);
        if (!s->memcg_params)
@@ -3623,9 +3531,9 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
         * the page allocator. Therefore, the following sequence when backed by
         * the SLUB allocator:
         *
-        *      memcg_stop_kmem_account();
-        *      kmalloc(<large_number>)
-        *      memcg_resume_kmem_account();
+        *      memcg_stop_kmem_account();
+        *      kmalloc(<large_number>)
+        *      memcg_resume_kmem_account();
         *
         * would effectively ignore the fact that we should skip accounting,
         * since it will drive us directly to this function without passing
@@ -3747,6 +3655,20 @@ void mem_cgroup_split_huge_fixup(struct page *head)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
+static inline
+void mem_cgroup_move_account_page_stat(struct mem_cgroup *from,
+                                       struct mem_cgroup *to,
+                                       unsigned int nr_pages,
+                                       enum mem_cgroup_stat_index idx)
+{
+       /* Update stat data for mem_cgroup */
+       preempt_disable();
+       WARN_ON_ONCE(from->stat->count[idx] < nr_pages);
+       __this_cpu_add(from->stat->count[idx], -nr_pages);
+       __this_cpu_add(to->stat->count[idx], nr_pages);
+       preempt_enable();
+}
+
 /**
  * mem_cgroup_move_account - move account of the page
  * @page: the page
@@ -3792,13 +3714,14 @@ static int mem_cgroup_move_account(struct page *page,
 
        move_lock_mem_cgroup(from, &flags);
 
-       if (!anon && page_mapped(page)) {
-               /* Update mapped_file data for mem_cgroup */
-               preempt_disable();
-               __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-               __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
-               preempt_enable();
-       }
+       if (!anon && page_mapped(page))
+               mem_cgroup_move_account_page_stat(from, to, nr_pages,
+                       MEM_CGROUP_STAT_FILE_MAPPED);
+
+       if (PageWriteback(page))
+               mem_cgroup_move_account_page_stat(from, to, nr_pages,
+                       MEM_CGROUP_STAT_WRITEBACK);
+
        mem_cgroup_charge_statistics(from, page, anon, -nr_pages);
 
        /* caller should have done css_get */
@@ -4654,7 +4577,7 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
                                   MEM_CGROUP_RECLAIM_SHRINK);
                curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
                /* Usage is reduced ? */
-               if (curusage >= oldusage)
+               if (curusage >= oldusage)
                        retry_count--;
                else
                        oldusage = curusage;
@@ -4675,7 +4598,7 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
        int enlarge = 0;
 
        /* see mem_cgroup_resize_res_limit */
-       retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
+       retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
        oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
        while (retry_count) {
                if (signal_pending(current)) {
@@ -4724,98 +4647,6 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
        return ret;
 }
 
-unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
-                                           gfp_t gfp_mask,
-                                           unsigned long *total_scanned)
-{
-       unsigned long nr_reclaimed = 0;
-       struct mem_cgroup_per_zone *mz, *next_mz = NULL;
-       unsigned long reclaimed;
-       int loop = 0;
-       struct mem_cgroup_tree_per_zone *mctz;
-       unsigned long long excess;
-       unsigned long nr_scanned;
-
-       if (order > 0)
-               return 0;
-
-       mctz = soft_limit_tree_node_zone(zone_to_nid(zone), zone_idx(zone));
-       /*
-        * This loop can run a while, specially if mem_cgroup's continuously
-        * keep exceeding their soft limit and putting the system under
-        * pressure
-        */
-       do {
-               if (next_mz)
-                       mz = next_mz;
-               else
-                       mz = mem_cgroup_largest_soft_limit_node(mctz);
-               if (!mz)
-                       break;
-
-               nr_scanned = 0;
-               reclaimed = mem_cgroup_soft_reclaim(mz->memcg, zone,
-                                                   gfp_mask, &nr_scanned);
-               nr_reclaimed += reclaimed;
-               *total_scanned += nr_scanned;
-               spin_lock(&mctz->lock);
-
-               /*
-                * If we failed to reclaim anything from this memory cgroup
-                * it is time to move on to the next cgroup
-                */
-               next_mz = NULL;
-               if (!reclaimed) {
-                       do {
-                               /*
-                                * Loop until we find yet another one.
-                                *
-                                * By the time we get the soft_limit lock
-                                * again, someone might have aded the
-                                * group back on the RB tree. Iterate to
-                                * make sure we get a different mem.
-                                * mem_cgroup_largest_soft_limit_node returns
-                                * NULL if no other cgroup is present on
-                                * the tree
-                                */
-                               next_mz =
-                               __mem_cgroup_largest_soft_limit_node(mctz);
-                               if (next_mz == mz)
-                                       css_put(&next_mz->memcg->css);
-                               else /* next_mz == NULL or other memcg */
-                                       break;
-                       } while (1);
-               }
-               __mem_cgroup_remove_exceeded(mz->memcg, mz, mctz);
-               excess = res_counter_soft_limit_excess(&mz->memcg->res);
-               /*
-                * One school of thought says that we should not add
-                * back the node to the tree if reclaim returns 0.
-                * But our reclaim could return 0, simply because due
-                * to priority we are exposing a smaller subset of
-                * memory to reclaim from. Consider this as a longer
-                * term TODO.
-                */
-               /* If excess == 0, no tree ops */
-               __mem_cgroup_insert_exceeded(mz->memcg, mz, mctz, excess);
-               spin_unlock(&mctz->lock);
-               css_put(&mz->memcg->css);
-               loop++;
-               /*
-                * Could not reclaim anything and there are no more
-                * mem cgroups to try or we seem to be looping without
-                * reclaiming anything.
-                */
-               if (!nr_reclaimed &&
-                       (next_mz == NULL ||
-                       loop > MEM_CGROUP_MAX_SOFT_LIMIT_RECLAIM_LOOPS))
-                       break;
-       } while (!nr_reclaimed);
-       if (next_mz)
-               css_put(&next_mz->memcg->css);
-       return nr_reclaimed;
-}
-
 /**
  * mem_cgroup_force_empty_list - clears LRU of a group
  * @memcg: group to clear
@@ -4987,18 +4818,12 @@ static int mem_cgroup_force_empty_write(struct cgroup_subsys_state *css,
                                        unsigned int event)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-       int ret;
 
        if (mem_cgroup_is_root(memcg))
                return -EINVAL;
-       css_get(&memcg->css);
-       ret = mem_cgroup_force_empty(memcg);
-       css_put(&memcg->css);
-
-       return ret;
+       return mem_cgroup_force_empty(memcg);
 }
 
-
 static u64 mem_cgroup_hierarchy_read(struct cgroup_subsys_state *css,
                                     struct cftype *cft)
 {
@@ -5136,7 +4961,7 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
         */
        mutex_lock(&memcg_create_mutex);
        mutex_lock(&set_limit_mutex);
-       if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
+       if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) {
                if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) {
                        ret = -EBUSY;
                        goto out;
@@ -5146,7 +4971,7 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
 
                ret = memcg_update_cache_sizes(memcg);
                if (ret) {
-                       res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
+                       res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX);
                        goto out;
                }
                static_key_slow_inc(&memcg_kmem_enabled_key);
@@ -5588,7 +5413,13 @@ static int compare_thresholds(const void *a, const void *b)
        const struct mem_cgroup_threshold *_a = a;
        const struct mem_cgroup_threshold *_b = b;
 
-       return _a->threshold - _b->threshold;
+       if (_a->threshold > _b->threshold)
+               return 1;
+
+       if (_a->threshold < _b->threshold)
+               return -1;
+
+       return 0;
 }
 
 static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg)
@@ -6080,8 +5911,6 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node)
        for (zone = 0; zone < MAX_NR_ZONES; zone++) {
                mz = &pn->zoneinfo[zone];
                lruvec_init(&mz->lruvec);
-               mz->usage_in_excess = 0;
-               mz->on_tree = false;
                mz->memcg = memcg;
        }
        memcg->nodeinfo[node] = pn;
@@ -6137,7 +5966,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
        int node;
        size_t size = memcg_size();
 
-       mem_cgroup_remove_from_trees(memcg);
        free_css_id(&mem_cgroup_subsys, &memcg->css);
 
        for_each_node(node)
@@ -6174,29 +6002,6 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(parent_mem_cgroup);
 
-static void __init mem_cgroup_soft_limit_tree_init(void)
-{
-       struct mem_cgroup_tree_per_node *rtpn;
-       struct mem_cgroup_tree_per_zone *rtpz;
-       int tmp, node, zone;
-
-       for_each_node(node) {
-               tmp = node;
-               if (!node_state(node, N_NORMAL_MEMORY))
-                       tmp = -1;
-               rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
-               BUG_ON(!rtpn);
-
-               soft_limit_tree.rb_tree_per_node[node] = rtpn;
-
-               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
-                       rtpz = &rtpn->rb_tree_per_zone[zone];
-                       rtpz->rb_root = RB_ROOT;
-                       spin_lock_init(&rtpz->lock);
-               }
-       }
-}
-
 static struct cgroup_subsys_state * __ref
 mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 {
@@ -6226,6 +6031,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        mutex_init(&memcg->thresholds_lock);
        spin_lock_init(&memcg->move_lock);
        vmpressure_init(&memcg->vmpressure);
+       spin_lock_init(&memcg->soft_lock);
 
        return &memcg->css;
 
@@ -6303,6 +6109,13 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 
        mem_cgroup_invalidate_reclaim_iterators(memcg);
        mem_cgroup_reparent_charges(memcg);
+       if (memcg->soft_contributed) {
+               while ((memcg = parent_mem_cgroup(memcg)))
+                       atomic_dec(&memcg->children_in_excess);
+
+               if (memcg != root_mem_cgroup && !root_mem_cgroup->use_hierarchy)
+                       atomic_dec(&root_mem_cgroup->children_in_excess);
+       }
        mem_cgroup_destroy_all_caches(memcg);
        vmpressure_cleanup(&memcg->vmpressure);
 }
@@ -6977,7 +6790,6 @@ static int __init mem_cgroup_init(void)
 {
        hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
        enable_swap_cgroup();
-       mem_cgroup_soft_limit_tree_init();
        memcg_stock_init();
        return 0;
 }
index d84c5e5..947ed54 100644 (file)
@@ -206,7 +206,7 @@ static int kill_proc(struct task_struct *t, unsigned long addr, int trapno,
 #ifdef __ARCH_SI_TRAPNO
        si.si_trapno = trapno;
 #endif
-       si.si_addr_lsb = compound_trans_order(compound_head(page)) + PAGE_SHIFT;
+       si.si_addr_lsb = compound_order(compound_head(page)) + PAGE_SHIFT;
 
        if ((flags & MF_ACTION_REQUIRED) && t == current) {
                si.si_code = BUS_MCEERR_AR;
@@ -248,10 +248,12 @@ void shake_page(struct page *p, int access)
         */
        if (access) {
                int nr;
+               int nid = page_to_nid(p);
                do {
                        struct shrink_control shrink = {
                                .gfp_mask = GFP_KERNEL,
                        };
+                       node_set(nid, shrink.nodes_to_scan);
 
                        nr = shrink_slab(&shrink, 1000, 1000);
                        if (page_count(p) == 1)
@@ -983,7 +985,7 @@ static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
 static void set_page_hwpoison_huge_page(struct page *hpage)
 {
        int i;
-       int nr_pages = 1 << compound_trans_order(hpage);
+       int nr_pages = 1 << compound_order(hpage);
        for (i = 0; i < nr_pages; i++)
                SetPageHWPoison(hpage + i);
 }
@@ -991,7 +993,7 @@ static void set_page_hwpoison_huge_page(struct page *hpage)
 static void clear_page_hwpoison_huge_page(struct page *hpage)
 {
        int i;
-       int nr_pages = 1 << compound_trans_order(hpage);
+       int nr_pages = 1 << compound_order(hpage);
        for (i = 0; i < nr_pages; i++)
                ClearPageHWPoison(hpage + i);
 }
@@ -1204,6 +1206,9 @@ int memory_failure(unsigned long pfn, int trapno, int flags)
        for (ps = error_states;; ps++)
                if ((p->flags & ps->mask) == ps->res)
                        break;
+
+       page_flags |= (p->flags & (1UL << PG_dirty));
+
        if (!ps->mask)
                for (ps = error_states;; ps++)
                        if ((page_flags & ps->mask) == ps->res)
@@ -1339,7 +1344,17 @@ int unpoison_memory(unsigned long pfn)
                return 0;
        }
 
-       nr_pages = 1 << compound_trans_order(page);
+       /*
+        * unpoison_memory() can encounter thp only when the thp is being
+        * worked by memory_failure() and the page lock is not held yet.
+        * In such case, we yield to memory_failure() and make unpoison fail.
+        */
+       if (PageTransHuge(page)) {
+               pr_info("MCE: Memory failure is now running on %#lx\n", pfn);
+                       return 0;
+       }
+
+       nr_pages = 1 << compound_order(page);
 
        if (!get_page_unless_zero(page)) {
                /*
@@ -1353,7 +1368,7 @@ int unpoison_memory(unsigned long pfn)
                        return 0;
                }
                if (TestClearPageHWPoison(p))
-                       atomic_long_sub(nr_pages, &num_poisoned_pages);
+                       atomic_long_dec(&num_poisoned_pages);
                pr_info("MCE: Software-unpoisoned free page %#lx\n", pfn);
                return 0;
        }
@@ -1375,7 +1390,7 @@ int unpoison_memory(unsigned long pfn)
        unlock_page(page);
 
        put_page(page);
-       if (freeit)
+       if (freeit && !(pfn == my_zero_pfn(0) && page_count(p) == 1))
                put_page(page);
 
        return 0;
@@ -1416,7 +1431,8 @@ static int __get_any_page(struct page *p, unsigned long pfn, int flags)
         * was free. This flag should be kept set until the source page
         * is freed and PG_hwpoison on it is set.
         */
-       set_migratetype_isolate(p, true);
+       if (get_pageblock_migratetype(p) != MIGRATE_ISOLATE)
+               set_migratetype_isolate(p, true);
        /*
         * When the target page is a free hugepage, just remove it
         * from free hugepage list.
@@ -1470,6 +1486,7 @@ static int soft_offline_huge_page(struct page *page, int flags)
        int ret;
        unsigned long pfn = page_to_pfn(page);
        struct page *hpage = compound_head(page);
+       LIST_HEAD(pagelist);
 
        /*
         * This double-check of PageHWPoison is to avoid the race with
@@ -1485,86 +1502,29 @@ static int soft_offline_huge_page(struct page *page, int flags)
        unlock_page(hpage);
 
        /* Keep page count to indicate a given hugepage is isolated. */
-       ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL,
-                               MIGRATE_SYNC);
-       put_page(hpage);
+       list_move(&hpage->lru, &pagelist);
+       ret = migrate_pages(&pagelist, new_page, MPOL_MF_MOVE_ALL,
+                               MIGRATE_SYNC, MR_MEMORY_FAILURE);
        if (ret) {
                pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
                        pfn, ret, page->flags);
+               /*
+                * We know that soft_offline_huge_page() tries to migrate
+                * only one hugepage pointed to by hpage, so we need not
+                * run through the pagelist here.
+                */
+               putback_active_hugepage(hpage);
+               if (ret > 0)
+                       ret = -EIO;
        } else {
                set_page_hwpoison_huge_page(hpage);
                dequeue_hwpoisoned_huge_page(hpage);
-               atomic_long_add(1 << compound_trans_order(hpage),
+               atomic_long_add(1 << compound_order(hpage),
                                &num_poisoned_pages);
        }
        return ret;
 }
 
-static int __soft_offline_page(struct page *page, int flags);
-
-/**
- * soft_offline_page - Soft offline a page.
- * @page: page to offline
- * @flags: flags. Same as memory_failure().
- *
- * Returns 0 on success, otherwise negated errno.
- *
- * Soft offline a page, by migration or invalidation,
- * without killing anything. This is for the case when
- * a page is not corrupted yet (so it's still valid to access),
- * but has had a number of corrected errors and is better taken
- * out.
- *
- * The actual policy on when to do that is maintained by
- * user space.
- *
- * This should never impact any application or cause data loss,
- * however it might take some time.
- *
- * This is not a 100% solution for all memory, but tries to be
- * ``good enough'' for the majority of memory.
- */
-int soft_offline_page(struct page *page, int flags)
-{
-       int ret;
-       unsigned long pfn = page_to_pfn(page);
-       struct page *hpage = compound_trans_head(page);
-
-       if (PageHWPoison(page)) {
-               pr_info("soft offline: %#lx page already poisoned\n", pfn);
-               return -EBUSY;
-       }
-       if (!PageHuge(page) && PageTransHuge(hpage)) {
-               if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
-                       pr_info("soft offline: %#lx: failed to split THP\n",
-                               pfn);
-                       return -EBUSY;
-               }
-       }
-
-       ret = get_any_page(page, pfn, flags);
-       if (ret < 0)
-               return ret;
-       if (ret) { /* for in-use pages */
-               if (PageHuge(page))
-                       ret = soft_offline_huge_page(page, flags);
-               else
-                       ret = __soft_offline_page(page, flags);
-       } else { /* for free pages */
-               if (PageHuge(page)) {
-                       set_page_hwpoison_huge_page(hpage);
-                       dequeue_hwpoisoned_huge_page(hpage);
-                       atomic_long_add(1 << compound_trans_order(hpage),
-                                       &num_poisoned_pages);
-               } else {
-                       SetPageHWPoison(page);
-                       atomic_long_inc(&num_poisoned_pages);
-               }
-       }
-       unset_migratetype_isolate(page, MIGRATE_MOVABLE);
-       return ret;
-}
-
 static int __soft_offline_page(struct page *page, int flags)
 {
        int ret;
@@ -1651,3 +1611,67 @@ static int __soft_offline_page(struct page *page, int flags)
        }
        return ret;
 }
+
+/**
+ * soft_offline_page - Soft offline a page.
+ * @page: page to offline
+ * @flags: flags. Same as memory_failure().
+ *
+ * Returns 0 on success, otherwise negated errno.
+ *
+ * Soft offline a page, by migration or invalidation,
+ * without killing anything. This is for the case when
+ * a page is not corrupted yet (so it's still valid to access),
+ * but has had a number of corrected errors and is better taken
+ * out.
+ *
+ * The actual policy on when to do that is maintained by
+ * user space.
+ *
+ * This should never impact any application or cause data loss,
+ * however it might take some time.
+ *
+ * This is not a 100% solution for all memory, but tries to be
+ * ``good enough'' for the majority of memory.
+ */
+int soft_offline_page(struct page *page, int flags)
+{
+       int ret;
+       unsigned long pfn = page_to_pfn(page);
+       struct page *hpage = compound_trans_head(page);
+
+       if (PageHWPoison(page)) {
+               pr_info("soft offline: %#lx page already poisoned\n", pfn);
+               return -EBUSY;
+       }
+       if (!PageHuge(page) && PageTransHuge(hpage)) {
+               if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
+                       pr_info("soft offline: %#lx: failed to split THP\n",
+                               pfn);
+                       return -EBUSY;
+               }
+       }
+
+       ret = get_any_page(page, pfn, flags);
+       if (ret < 0)
+               goto unset;
+       if (ret) { /* for in-use pages */
+               if (PageHuge(page))
+                       ret = soft_offline_huge_page(page, flags);
+               else
+                       ret = __soft_offline_page(page, flags);
+       } else { /* for free pages */
+               if (PageHuge(page)) {
+                       set_page_hwpoison_huge_page(hpage);
+                       dequeue_hwpoisoned_huge_page(hpage);
+                       atomic_long_add(1 << compound_order(hpage),
+                                       &num_poisoned_pages);
+               } else {
+                       SetPageHWPoison(page);
+                       atomic_long_inc(&num_poisoned_pages);
+               }
+       }
+unset:
+       unset_migratetype_isolate(page, MIGRATE_MOVABLE);
+       return ret;
+}
index b3c6bf9..ca00039 100644 (file)
@@ -372,30 +372,6 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
 
 #endif /* CONFIG_HAVE_RCU_TABLE_FREE */
 
-/*
- * If a p?d_bad entry is found while walking page tables, report
- * the error, before resetting entry to p?d_none.  Usually (but
- * very seldom) called out from the p?d_none_or_clear_bad macros.
- */
-
-void pgd_clear_bad(pgd_t *pgd)
-{
-       pgd_ERROR(*pgd);
-       pgd_clear(pgd);
-}
-
-void pud_clear_bad(pud_t *pud)
-{
-       pud_ERROR(*pud);
-       pud_clear(pud);
-}
-
-void pmd_clear_bad(pmd_t *pmd)
-{
-       pmd_ERROR(*pmd);
-       pmd_clear(pmd);
-}
-
 /*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
@@ -1505,7 +1481,8 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        if (pud_none(*pud))
                goto no_page_table;
        if (pud_huge(*pud) && vma->vm_flags & VM_HUGETLB) {
-               BUG_ON(flags & FOLL_GET);
+               if (flags & FOLL_GET)
+                       goto out;
                page = follow_huge_pud(mm, address, pud, flags & FOLL_WRITE);
                goto out;
        }
@@ -1516,8 +1493,20 @@ struct page *follow_page_mask(struct vm_area_struct *vma,
        if (pmd_none(*pmd))
                goto no_page_table;
        if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
-               BUG_ON(flags & FOLL_GET);
                page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
+               if (flags & FOLL_GET) {
+                       /*
+                        * Refcount on tail pages are not well-defined and
+                        * shouldn't be taken. The caller should handle a NULL
+                        * return when trying to follow tail pages.
+                        */
+                       if (PageHead(page))
+                               get_page(page);
+                       else {
+                               page = NULL;
+                               goto out;
+                       }
+               }
                goto out;
        }
        if ((flags & FOLL_NUMA) && pmd_numa(*pmd))
@@ -3706,7 +3695,7 @@ static int do_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vma,
  * but allow concurrent faults), and pte mapped but not yet locked.
  * We return with mmap_sem still held, but pte unmapped and unlocked.
  */
-int handle_pte_fault(struct mm_struct *mm,
+static int handle_pte_fault(struct mm_struct *mm,
                     struct vm_area_struct *vma, unsigned long address,
                     pte_t *pte, pmd_t *pmd, unsigned int flags)
 {
@@ -3765,22 +3754,14 @@ unlock:
 /*
  * By the time we get here, we already hold the mm semaphore
  */
-int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
-               unsigned long address, unsigned int flags)
+static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                            unsigned long address, unsigned int flags)
 {
        pgd_t *pgd;
        pud_t *pud;
        pmd_t *pmd;
        pte_t *pte;
 
-       __set_current_state(TASK_RUNNING);
-
-       count_vm_event(PGFAULT);
-       mem_cgroup_count_vm_event(mm, PGFAULT);
-
-       /* do counter updates before entering really critical section. */
-       check_sync_rss_stat(current);
-
        if (unlikely(is_vm_hugetlb_page(vma)))
                return hugetlb_fault(mm, vma, address, flags);
 
@@ -3793,9 +3774,12 @@ retry:
        if (!pmd)
                return VM_FAULT_OOM;
        if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
+               int ret = VM_FAULT_FALLBACK;
                if (!vma->vm_ops)
-                       return do_huge_pmd_anonymous_page(mm, vma, address,
-                                                         pmd, flags);
+                       ret = do_huge_pmd_anonymous_page(mm, vma, address,
+                                       pmd, flags);
+               if (!(ret & VM_FAULT_FALLBACK))
+                       return ret;
        } else {
                pmd_t orig_pmd = *pmd;
                int ret;
@@ -3861,6 +3845,37 @@ retry:
        return handle_pte_fault(mm, vma, address, pte, pmd, flags);
 }
 
+int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+                   unsigned long address, unsigned int flags)
+{
+       int ret;
+
+       __set_current_state(TASK_RUNNING);
+
+       count_vm_event(PGFAULT);
+       mem_cgroup_count_vm_event(mm, PGFAULT);
+
+       /* do counter updates before entering really critical section. */
+       check_sync_rss_stat(current);
+
+       /*
+        * Enable the memcg OOM handling for faults triggered in user
+        * space.  Kernel faults are handled more gracefully.
+        */
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_enable_oom();
+
+       ret = __handle_mm_fault(mm, vma, address, flags);
+
+       if (flags & FAULT_FLAG_USER)
+               mem_cgroup_disable_oom();
+
+       if (WARN_ON(task_in_memcg_oom(current) && !(ret & VM_FAULT_OOM)))
+               mem_cgroup_oom_synchronize();
+
+       return ret;
+}
+
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * Allocate page upper directory.
index ca1dd3a..ed85fe3 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/mm_inline.h>
 #include <linux/firmware-map.h>
 #include <linux/stop_machine.h>
+#include <linux/hugetlb.h>
 
 #include <asm/tlbflush.h>
 
@@ -51,14 +52,10 @@ DEFINE_MUTEX(mem_hotplug_mutex);
 void lock_memory_hotplug(void)
 {
        mutex_lock(&mem_hotplug_mutex);
-
-       /* for exclusive hibernation if CONFIG_HIBERNATION=y */
-       lock_system_sleep();
 }
 
 void unlock_memory_hotplug(void)
 {
-       unlock_system_sleep();
        mutex_unlock(&mem_hotplug_mutex);
 }
 
@@ -194,7 +191,7 @@ void register_page_bootmem_info_node(struct pglist_data *pgdat)
 
        zone = &pgdat->node_zones[0];
        for (; zone < pgdat->node_zones + MAX_NR_ZONES - 1; zone++) {
-               if (zone->wait_table) {
+               if (zone_is_initialized(zone)) {
                        nr_pages = zone->wait_table_hash_nr_entries
                                * sizeof(wait_queue_head_t);
                        nr_pages = PAGE_ALIGN(nr_pages) >> PAGE_SHIFT;
@@ -229,8 +226,8 @@ static void grow_zone_span(struct zone *zone, unsigned long start_pfn,
 
        zone_span_writelock(zone);
 
-       old_zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
-       if (!zone->spanned_pages || start_pfn < zone->zone_start_pfn)
+       old_zone_end_pfn = zone_end_pfn(zone);
+       if (zone_is_empty(zone) || start_pfn < zone->zone_start_pfn)
                zone->zone_start_pfn = start_pfn;
 
        zone->spanned_pages = max(old_zone_end_pfn, end_pfn) -
@@ -305,7 +302,7 @@ static int __meminit move_pfn_range_left(struct zone *z1, struct zone *z2,
                goto out_fail;
 
        /* use start_pfn for z1's start_pfn if z1 is empty */
-       if (z1->spanned_pages)
+       if (!zone_is_empty(z1))
                z1_start_pfn = z1->zone_start_pfn;
        else
                z1_start_pfn = start_pfn;
@@ -347,7 +344,7 @@ static int __meminit move_pfn_range_right(struct zone *z1, struct zone *z2,
                goto out_fail;
 
        /* use end_pfn for z2's end_pfn if z2 is empty */
-       if (z2->spanned_pages)
+       if (!zone_is_empty(z2))
                z2_end_pfn = zone_end_pfn(z2);
        else
                z2_end_pfn = end_pfn;
@@ -514,8 +511,9 @@ static int find_biggest_section_pfn(int nid, struct zone *zone,
 static void shrink_zone_span(struct zone *zone, unsigned long start_pfn,
                             unsigned long end_pfn)
 {
-       unsigned long zone_start_pfn =  zone->zone_start_pfn;
-       unsigned long zone_end_pfn = zone->zone_start_pfn + zone->spanned_pages;
+       unsigned long zone_start_pfn = zone->zone_start_pfn;
+       unsigned long z = zone_end_pfn(zone); /* zone_end_pfn namespace clash */
+       unsigned long zone_end_pfn = z;
        unsigned long pfn;
        struct mem_section *ms;
        int nid = zone_to_nid(zone);
@@ -1069,6 +1067,23 @@ out:
        return ret;
 }
 
+static int check_hotplug_memory_range(u64 start, u64 size)
+{
+       u64 start_pfn = start >> PAGE_SHIFT;
+       u64 nr_pages = size >> PAGE_SHIFT;
+
+       /* Memory range must be aligned with section */
+       if ((start_pfn & ~PAGE_SECTION_MASK) ||
+           (nr_pages % PAGES_PER_SECTION) || (!nr_pages)) {
+               pr_err("Section-unaligned hotplug range: start 0x%llx, size 0x%llx\n",
+                               (unsigned long long)start,
+                               (unsigned long long)size);
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
 /* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
 int __ref add_memory(int nid, u64 start, u64 size)
 {
@@ -1078,6 +1093,10 @@ int __ref add_memory(int nid, u64 start, u64 size)
        struct resource *res;
        int ret;
 
+       ret = check_hotplug_memory_range(start, size);
+       if (ret)
+               return ret;
+
        lock_memory_hotplug();
 
        res = register_memory_resource(start, size);
@@ -1208,10 +1227,12 @@ static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 }
 
 /*
- * Scanning pfn is much easier than scanning lru list.
- * Scan pfn from start to end and Find LRU page.
+ * Scan pfn range [start,end) to find movable/migratable pages (LRU pages
+ * and hugepages). We scan pfn because it's much easier than scanning over
+ * linked list. This function returns the pfn of the first found movable
+ * page if it's found, otherwise 0.
  */
-static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
+static unsigned long scan_movable_pages(unsigned long start, unsigned long end)
 {
        unsigned long pfn;
        struct page *page;
@@ -1220,6 +1241,13 @@ static unsigned long scan_lru_pages(unsigned long start, unsigned long end)
                        page = pfn_to_page(pfn);
                        if (PageLRU(page))
                                return pfn;
+                       if (PageHuge(page)) {
+                               if (is_hugepage_active(page))
+                                       return pfn;
+                               else
+                                       pfn = round_up(pfn + 1,
+                                               1 << compound_order(page)) - 1;
+                       }
                }
        }
        return 0;
@@ -1240,6 +1268,19 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                if (!pfn_valid(pfn))
                        continue;
                page = pfn_to_page(pfn);
+
+               if (PageHuge(page)) {
+                       struct page *head = compound_head(page);
+                       pfn = page_to_pfn(head) + (1<<compound_order(head)) - 1;
+                       if (compound_order(head) > PFN_SECTION_SHIFT) {
+                               ret = -EBUSY;
+                               break;
+                       }
+                       if (isolate_huge_page(page, &source))
+                               move_pages -= 1 << compound_order(head);
+                       continue;
+               }
+
                if (!get_page_unless_zero(page))
                        continue;
                /*
@@ -1272,7 +1313,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
        }
        if (!list_empty(&source)) {
                if (not_managed) {
-                       putback_lru_pages(&source);
+                       putback_movable_pages(&source);
                        goto out;
                }
 
@@ -1283,7 +1324,7 @@ do_migrate_range(unsigned long start_pfn, unsigned long end_pfn)
                ret = migrate_pages(&source, alloc_migrate_target, 0,
                                        MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
                if (ret)
-                       putback_lru_pages(&source);
+                       putback_movable_pages(&source);
        }
 out:
        return ret;
@@ -1472,7 +1513,6 @@ static int __ref __offline_pages(unsigned long start_pfn,
        struct zone *zone;
        struct memory_notify arg;
 
-       BUG_ON(start_pfn >= end_pfn);
        /* at least, alignment against pageblock is necessary */
        if (!IS_ALIGNED(start_pfn, pageblock_nr_pages))
                return -EINVAL;
@@ -1527,8 +1567,8 @@ repeat:
                drain_all_pages();
        }
 
-       pfn = scan_lru_pages(start_pfn, end_pfn);
-       if (pfn) { /* We have page on LRU */
+       pfn = scan_movable_pages(start_pfn, end_pfn);
+       if (pfn) { /* We have movable pages */
                ret = do_migrate_range(pfn, end_pfn);
                if (!ret) {
                        drain = 1;
@@ -1547,6 +1587,11 @@ repeat:
        yield();
        /* drain pcp pages, this is synchronous. */
        drain_all_pages();
+       /*
+        * dissolve free hugepages in the memory block before doing offlining
+        * actually in order to make hugetlbfs's object counting consistent.
+        */
+       dissolve_free_huge_pages(start_pfn, end_pfn);
        /* check again */
        offlined_pages = check_pages_isolated(start_pfn, end_pfn);
        if (offlined_pages < 0) {
@@ -1674,9 +1719,8 @@ static int is_memblock_offlined_cb(struct memory_block *mem, void *arg)
        return ret;
 }
 
-static int check_cpu_on_node(void *data)
+static int check_cpu_on_node(pg_data_t *pgdat)
 {
-       struct pglist_data *pgdat = data;
        int cpu;
 
        for_each_present_cpu(cpu) {
@@ -1691,10 +1735,9 @@ static int check_cpu_on_node(void *data)
        return 0;
 }
 
-static void unmap_cpu_on_node(void *data)
+static void unmap_cpu_on_node(pg_data_t *pgdat)
 {
 #ifdef CONFIG_ACPI_NUMA
-       struct pglist_data *pgdat = data;
        int cpu;
 
        for_each_possible_cpu(cpu)
@@ -1703,10 +1746,11 @@ static void unmap_cpu_on_node(void *data)
 #endif
 }
 
-static int check_and_unmap_cpu_on_node(void *data)
+static int check_and_unmap_cpu_on_node(pg_data_t *pgdat)
 {
-       int ret = check_cpu_on_node(data);
+       int ret;
 
+       ret = check_cpu_on_node(pgdat);
        if (ret)
                return ret;
 
@@ -1715,11 +1759,18 @@ static int check_and_unmap_cpu_on_node(void *data)
         * the cpu_to_node() now.
         */
 
-       unmap_cpu_on_node(data);
+       unmap_cpu_on_node(pgdat);
        return 0;
 }
 
-/* offline the node if all memory sections of this node are removed */
+/**
+ * try_offline_node
+ *
+ * Offline a node if all memory sections and cpus of the node are removed.
+ *
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations before this call.
+ */
 void try_offline_node(int nid)
 {
        pg_data_t *pgdat = NODE_DATA(nid);
@@ -1745,7 +1796,7 @@ void try_offline_node(int nid)
                return;
        }
 
-       if (stop_machine(check_and_unmap_cpu_on_node, pgdat, NULL))
+       if (check_and_unmap_cpu_on_node(pgdat))
                return;
 
        /*
@@ -1782,10 +1833,19 @@ void try_offline_node(int nid)
 }
 EXPORT_SYMBOL(try_offline_node);
 
+/**
+ * remove_memory
+ *
+ * NOTE: The caller must call lock_device_hotplug() to serialize hotplug
+ * and online/offline operations before this call, as required by
+ * try_offline_node().
+ */
 void __ref remove_memory(int nid, u64 start, u64 size)
 {
        int ret;
 
+       BUG_ON(check_hotplug_memory_range(start, size));
+
        lock_memory_hotplug();
 
        /*
index 4baf12e..0472964 100644 (file)
@@ -123,16 +123,19 @@ static struct mempolicy preferred_node_policy[MAX_NUMNODES];
 static struct mempolicy *get_task_policy(struct task_struct *p)
 {
        struct mempolicy *pol = p->mempolicy;
-       int node;
 
        if (!pol) {
-               node = numa_node_id();
-               if (node != NUMA_NO_NODE)
-                       pol = &preferred_node_policy[node];
+               int node = numa_node_id();
 
-               /* preferred_node_policy is not initialised early in boot */
-               if (!pol->mode)
-                       pol = NULL;
+               if (node != NUMA_NO_NODE) {
+                       pol = &preferred_node_policy[node];
+                       /*
+                        * preferred_node_policy is not initialised early in
+                        * boot
+                        */
+                       if (!pol->mode)
+                               pol = NULL;
+               }
        }
 
        return pol;
@@ -473,8 +476,11 @@ static const struct mempolicy_operations mpol_ops[MPOL_MAX] = {
 static void migrate_page_add(struct page *page, struct list_head *pagelist,
                                unsigned long flags);
 
-/* Scan through pages checking if pages follow certain conditions. */
-static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
+/*
+ * Scan through pages checking if pages follow certain conditions,
+ * and move them to the pagelist if they do.
+ */
+static int queue_pages_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                unsigned long addr, unsigned long end,
                const nodemask_t *nodes, unsigned long flags,
                void *private)
@@ -512,7 +518,31 @@ static int check_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
        return addr != end;
 }
 
-static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
+static void queue_pages_hugetlb_pmd_range(struct vm_area_struct *vma,
+               pmd_t *pmd, const nodemask_t *nodes, unsigned long flags,
+                                   void *private)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+       int nid;
+       struct page *page;
+
+       spin_lock(&vma->vm_mm->page_table_lock);
+       page = pte_page(huge_ptep_get((pte_t *)pmd));
+       nid = page_to_nid(page);
+       if (node_isset(nid, *nodes) == !!(flags & MPOL_MF_INVERT))
+               goto unlock;
+       /* With MPOL_MF_MOVE, we migrate only unshared hugepage. */
+       if (flags & (MPOL_MF_MOVE_ALL) ||
+           (flags & MPOL_MF_MOVE && page_mapcount(page) == 1))
+               isolate_huge_page(page, private);
+unlock:
+       spin_unlock(&vma->vm_mm->page_table_lock);
+#else
+       BUG();
+#endif
+}
+
+static inline int queue_pages_pmd_range(struct vm_area_struct *vma, pud_t *pud,
                unsigned long addr, unsigned long end,
                const nodemask_t *nodes, unsigned long flags,
                void *private)
@@ -523,17 +553,24 @@ static inline int check_pmd_range(struct vm_area_struct *vma, pud_t *pud,
        pmd = pmd_offset(pud, addr);
        do {
                next = pmd_addr_end(addr, end);
+               if (!pmd_present(*pmd))
+                       continue;
+               if (pmd_huge(*pmd) && is_vm_hugetlb_page(vma)) {
+                       queue_pages_hugetlb_pmd_range(vma, pmd, nodes,
+                                               flags, private);
+                       continue;
+               }
                split_huge_page_pmd(vma, addr, pmd);
                if (pmd_none_or_trans_huge_or_clear_bad(pmd))
                        continue;
-               if (check_pte_range(vma, pmd, addr, next, nodes,
+               if (queue_pages_pte_range(vma, pmd, addr, next, nodes,
                                    flags, private))
                        return -EIO;
        } while (pmd++, addr = next, addr != end);
        return 0;
 }
 
-static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
+static inline int queue_pages_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
                unsigned long addr, unsigned long end,
                const nodemask_t *nodes, unsigned long flags,
                void *private)
@@ -544,16 +581,18 @@ static inline int check_pud_range(struct vm_area_struct *vma, pgd_t *pgd,
        pud = pud_offset(pgd, addr);
        do {
                next = pud_addr_end(addr, end);
+               if (pud_huge(*pud) && is_vm_hugetlb_page(vma))
+                       continue;
                if (pud_none_or_clear_bad(pud))
                        continue;
-               if (check_pmd_range(vma, pud, addr, next, nodes,
+               if (queue_pages_pmd_range(vma, pud, addr, next, nodes,
                                    flags, private))
                        return -EIO;
        } while (pud++, addr = next, addr != end);
        return 0;
 }
 
-static inline int check_pgd_range(struct vm_area_struct *vma,
+static inline int queue_pages_pgd_range(struct vm_area_struct *vma,
                unsigned long addr, unsigned long end,
                const nodemask_t *nodes, unsigned long flags,
                void *private)
@@ -566,7 +605,7 @@ static inline int check_pgd_range(struct vm_area_struct *vma,
                next = pgd_addr_end(addr, end);
                if (pgd_none_or_clear_bad(pgd))
                        continue;
-               if (check_pud_range(vma, pgd, addr, next, nodes,
+               if (queue_pages_pud_range(vma, pgd, addr, next, nodes,
                                    flags, private))
                        return -EIO;
        } while (pgd++, addr = next, addr != end);
@@ -604,12 +643,14 @@ static unsigned long change_prot_numa(struct vm_area_struct *vma,
 #endif /* CONFIG_ARCH_USES_NUMA_PROT_NONE */
 
 /*
- * Check if all pages in a range are on a set of nodes.
- * If pagelist != NULL then isolate pages from the LRU and
- * put them on the pagelist.
+ * Walk through page tables and collect pages to be migrated.
+ *
+ * If pages found in a given range are on a set of nodes (determined by
+ * @nodes and @flags,) it's isolated and queued to the pagelist which is
+ * passed via @private.)
  */
 static struct vm_area_struct *
-check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
+queue_pages_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                const nodemask_t *nodes, unsigned long flags, void *private)
 {
        int err;
@@ -635,9 +676,6 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                                return ERR_PTR(-EFAULT);
                }
 
-               if (is_vm_hugetlb_page(vma))
-                       goto next;
-
                if (flags & MPOL_MF_LAZY) {
                        change_prot_numa(vma, start, endvma);
                        goto next;
@@ -647,7 +685,7 @@ check_range(struct mm_struct *mm, unsigned long start, unsigned long end,
                     ((flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) &&
                      vma_migratable(vma))) {
 
-                       err = check_pgd_range(vma, start, endvma, nodes,
+                       err = queue_pages_pgd_range(vma, start, endvma, nodes,
                                                flags, private);
                        if (err) {
                                first = ERR_PTR(err);
@@ -990,7 +1028,11 @@ static void migrate_page_add(struct page *page, struct list_head *pagelist,
 
 static struct page *new_node_page(struct page *page, unsigned long node, int **x)
 {
-       return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
+       if (PageHuge(page))
+               return alloc_huge_page_node(page_hstate(compound_head(page)),
+                                       node);
+       else
+               return alloc_pages_exact_node(node, GFP_HIGHUSER_MOVABLE, 0);
 }
 
 /*
@@ -1013,14 +1055,14 @@ static int migrate_to_node(struct mm_struct *mm, int source, int dest,
         * space range and MPOL_MF_DISCONTIG_OK, this call can not fail.
         */
        VM_BUG_ON(!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)));
-       check_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
+       queue_pages_range(mm, mm->mmap->vm_start, mm->task_size, &nmask,
                        flags | MPOL_MF_DISCONTIG_OK, &pagelist);
 
        if (!list_empty(&pagelist)) {
                err = migrate_pages(&pagelist, new_node_page, dest,
                                        MIGRATE_SYNC, MR_SYSCALL);
                if (err)
-                       putback_lru_pages(&pagelist);
+                       putback_movable_pages(&pagelist);
        }
 
        return err;
@@ -1154,10 +1196,14 @@ static struct page *new_vma_page(struct page *page, unsigned long private, int *
                        break;
                vma = vma->vm_next;
        }
-
        /*
-        * if !vma, alloc_page_vma() will use task or system default policy
+        * queue_pages_range() confirms that @page belongs to some vma,
+        * so vma shouldn't be NULL.
         */
+       BUG_ON(!vma);
+
+       if (PageHuge(page))
+               return alloc_huge_page_noerr(vma, address, 1);
        return alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
 }
 #else
@@ -1249,7 +1295,7 @@ static long do_mbind(unsigned long start, unsigned long len,
        if (err)
                goto mpol_out;
 
-       vma = check_range(mm, start, end, nmask,
+       vma = queue_pages_range(mm, start, end, nmask,
                          flags | MPOL_MF_INVERT, &pagelist);
 
        err = PTR_ERR(vma);     /* maybe ... */
@@ -1265,7 +1311,7 @@ static long do_mbind(unsigned long start, unsigned long len,
                                        (unsigned long)vma,
                                        MIGRATE_SYNC, MR_MEMPOLICY_MBIND);
                        if (nr_failed)
-                               putback_lru_pages(&pagelist);
+                               putback_movable_pages(&pagelist);
                }
 
                if (nr_failed && (flags & MPOL_MF_STRICT))
@@ -2065,6 +2111,16 @@ retry_cpuset:
 }
 EXPORT_SYMBOL(alloc_pages_current);
 
+int vma_dup_policy(struct vm_area_struct *src, struct vm_area_struct *dst)
+{
+       struct mempolicy *pol = mpol_dup(vma_policy(src));
+
+       if (IS_ERR(pol))
+               return PTR_ERR(pol);
+       dst->vm_policy = pol;
+       return 0;
+}
+
 /*
  * If mpol_dup() sees current->cpuset == cpuset_being_rebound, then it
  * rebinds the mempolicy its copying by calling mpol_rebind_policy()
index 5499047..659aa42 100644 (file)
@@ -73,7 +73,7 @@ mempool_t *mempool_create_node(int min_nr, mempool_alloc_t *alloc_fn,
                               gfp_t gfp_mask, int node_id)
 {
        mempool_t *pool;
-       pool = kmalloc_node(sizeof(*pool), gfp_mask | __GFP_ZERO, node_id);
+       pool = kzalloc_node(sizeof(*pool), gfp_mask, node_id);
        if (!pool)
                return NULL;
        pool->elements = kmalloc_node(min_nr * sizeof(void *),
index 6f0c244..9c8d5f5 100644 (file)
@@ -100,6 +100,10 @@ void putback_movable_pages(struct list_head *l)
        struct page *page2;
 
        list_for_each_entry_safe(page, page2, l, lru) {
+               if (unlikely(PageHuge(page))) {
+                       putback_active_hugepage(page);
+                       continue;
+               }
                list_del(&page->lru);
                dec_zone_page_state(page, NR_ISOLATED_ANON +
                                page_is_file_cache(page));
@@ -307,7 +311,7 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head,
  * 2 for pages with a mapping
  * 3 for pages with a mapping and PagePrivate/PagePrivate2 set.
  */
-static int migrate_page_move_mapping(struct address_space *mapping,
+int migrate_page_move_mapping(struct address_space *mapping,
                struct page *newpage, struct page *page,
                struct buffer_head *head, enum migrate_mode mode)
 {
@@ -945,6 +949,16 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
        struct page *new_hpage = get_new_page(hpage, private, &result);
        struct anon_vma *anon_vma = NULL;
 
+       /*
+        * Movability of hugepages depends on architectures and hugepage size.
+        * This check is necessary because some callers of hugepage migration
+        * like soft offline and memory hotremove don't walk through page
+        * tables or check whether the hugepage is pmd-based or not before
+        * kicking migration.
+        */
+       if (!hugepage_migration_support(page_hstate(hpage)))
+               return -ENOSYS;
+
        if (!new_hpage)
                return -ENOMEM;
 
@@ -975,6 +989,8 @@ static int unmap_and_move_huge_page(new_page_t get_new_page,
 
        unlock_page(hpage);
 out:
+       if (rc != -EAGAIN)
+               putback_active_hugepage(hpage);
        put_page(new_hpage);
        if (result) {
                if (rc)
@@ -1025,7 +1041,11 @@ int migrate_pages(struct list_head *from, new_page_t get_new_page,
                list_for_each_entry_safe(page, page2, from, lru) {
                        cond_resched();
 
-                       rc = unmap_and_move(get_new_page, private,
+                       if (PageHuge(page))
+                               rc = unmap_and_move_huge_page(get_new_page,
+                                               private, page, pass > 2, mode);
+                       else
+                               rc = unmap_and_move(get_new_page, private,
                                                page, pass > 2, mode);
 
                        switch(rc) {
@@ -1058,32 +1078,6 @@ out:
        return rc;
 }
 
-int migrate_huge_page(struct page *hpage, new_page_t get_new_page,
-                     unsigned long private, enum migrate_mode mode)
-{
-       int pass, rc;
-
-       for (pass = 0; pass < 10; pass++) {
-               rc = unmap_and_move_huge_page(get_new_page, private,
-                                               hpage, pass > 2, mode);
-               switch (rc) {
-               case -ENOMEM:
-                       goto out;
-               case -EAGAIN:
-                       /* try again */
-                       cond_resched();
-                       break;
-               case MIGRATEPAGE_SUCCESS:
-                       goto out;
-               default:
-                       rc = -EIO;
-                       goto out;
-               }
-       }
-out:
-       return rc;
-}
-
 #ifdef CONFIG_NUMA
 /*
  * Move a list of individual pages
@@ -1108,7 +1102,11 @@ static struct page *new_page_node(struct page *p, unsigned long private,
 
        *result = &pm->status;
 
-       return alloc_pages_exact_node(pm->node,
+       if (PageHuge(p))
+               return alloc_huge_page_node(page_hstate(compound_head(p)),
+                                       pm->node);
+       else
+               return alloc_pages_exact_node(pm->node,
                                GFP_HIGHUSER_MOVABLE | GFP_THISNODE, 0);
 }
 
@@ -1168,6 +1166,11 @@ static int do_move_page_to_node_array(struct mm_struct *mm,
                                !migrate_all)
                        goto put_and_set;
 
+               if (PageHuge(page)) {
+                       isolate_huge_page(page, &pagelist);
+                       goto put_and_set;
+               }
+
                err = isolate_lru_page(page);
                if (!err) {
                        list_add_tail(&page->lru, &pagelist);
@@ -1190,7 +1193,7 @@ set_status:
                err = migrate_pages(&pagelist, new_page_node,
                                (unsigned long)pm, MIGRATE_SYNC, MR_SYSCALL);
                if (err)
-                       putback_lru_pages(&pagelist);
+                       putback_movable_pages(&pagelist);
        }
 
        up_read(&mm->mmap_sem);
@@ -1468,7 +1471,7 @@ static bool migrate_balanced_pgdat(struct pglist_data *pgdat,
                if (!populated_zone(zone))
                        continue;
 
-               if (zone->all_unreclaimable)
+               if (!zone_reclaimable(zone))
                        continue;
 
                /* Avoid waking kswapd by allocating pages_to_migrate pages. */
index 79b7cf7..d638026 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/swap.h>
 #include <linux/swapops.h>
 #include <linux/pagemap.h>
+#include <linux/pagevec.h>
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 #include <linux/sched.h>
@@ -18,6 +19,8 @@
 #include <linux/rmap.h>
 #include <linux/mmzone.h>
 #include <linux/hugetlb.h>
+#include <linux/memcontrol.h>
+#include <linux/mm_inline.h>
 
 #include "internal.h"
 
@@ -87,6 +90,47 @@ void mlock_vma_page(struct page *page)
        }
 }
 
+/*
+ * Finish munlock after successful page isolation
+ *
+ * Page must be locked. This is a wrapper for try_to_munlock()
+ * and putback_lru_page() with munlock accounting.
+ */
+static void __munlock_isolated_page(struct page *page)
+{
+       int ret = SWAP_AGAIN;
+
+       /*
+        * Optimization: if the page was mapped just once, that's our mapping
+        * and we don't need to check all the other vmas.
+        */
+       if (page_mapcount(page) > 1)
+               ret = try_to_munlock(page);
+
+       /* Did try_to_unlock() succeed or punt? */
+       if (ret != SWAP_MLOCK)
+               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+
+       putback_lru_page(page);
+}
+
+/*
+ * Accounting for page isolation fail during munlock
+ *
+ * Performs accounting when page isolation fails in munlock. There is nothing
+ * else to do because it means some other task has already removed the page
+ * from the LRU. putback_lru_page() will take care of removing the page from
+ * the unevictable list, if necessary. vmscan [page_referenced()] will move
+ * the page back to the unevictable list if some other vma has it mlocked.
+ */
+static void __munlock_isolation_failed(struct page *page)
+{
+       if (PageUnevictable(page))
+               count_vm_event(UNEVICTABLE_PGSTRANDED);
+       else
+               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
+}
+
 /**
  * munlock_vma_page - munlock a vma page
  * @page - page to be unlocked
@@ -112,37 +156,10 @@ unsigned int munlock_vma_page(struct page *page)
                unsigned int nr_pages = hpage_nr_pages(page);
                mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
                page_mask = nr_pages - 1;
-               if (!isolate_lru_page(page)) {
-                       int ret = SWAP_AGAIN;
-
-                       /*
-                        * Optimization: if the page was mapped just once,
-                        * that's our mapping and we don't need to check all the
-                        * other vmas.
-                        */
-                       if (page_mapcount(page) > 1)
-                               ret = try_to_munlock(page);
-                       /*
-                        * did try_to_unlock() succeed or punt?
-                        */
-                       if (ret != SWAP_MLOCK)
-                               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
-
-                       putback_lru_page(page);
-               } else {
-                       /*
-                        * Some other task has removed the page from the LRU.
-                        * putback_lru_page() will take care of removing the
-                        * page from the unevictable list, if necessary.
-                        * vmscan [page_referenced()] will move the page back
-                        * to the unevictable list if some other vma has it
-                        * mlocked.
-                        */
-                       if (PageUnevictable(page))
-                               count_vm_event(UNEVICTABLE_PGSTRANDED);
-                       else
-                               count_vm_event(UNEVICTABLE_PGMUNLOCKED);
-               }
+               if (!isolate_lru_page(page))
+                       __munlock_isolated_page(page);
+               else
+                       __munlock_isolation_failed(page);
        }
 
        return page_mask;
@@ -209,6 +226,191 @@ static int __mlock_posix_error_return(long retval)
        return retval;
 }
 
+/*
+ * Prepare page for fast batched LRU putback via putback_lru_evictable_pagevec()
+ *
+ * The fast path is available only for evictable pages with single mapping.
+ * Then we can bypass the per-cpu pvec and get better performance.
+ * when mapcount > 1 we need try_to_munlock() which can fail.
+ * when !page_evictable(), we need the full redo logic of putback_lru_page to
+ * avoid leaving evictable page in unevictable list.
+ *
+ * In case of success, @page is added to @pvec and @pgrescued is incremented
+ * in case that the page was previously unevictable. @page is also unlocked.
+ */
+static bool __putback_lru_fast_prepare(struct page *page, struct pagevec *pvec,
+               int *pgrescued)
+{
+       VM_BUG_ON(PageLRU(page));
+       VM_BUG_ON(!PageLocked(page));
+
+       if (page_mapcount(page) <= 1 && page_evictable(page)) {
+               pagevec_add(pvec, page);
+               if (TestClearPageUnevictable(page))
+                       (*pgrescued)++;
+               unlock_page(page);
+               return true;
+       }
+
+       return false;
+}
+
+/*
+ * Putback multiple evictable pages to the LRU
+ *
+ * Batched putback of evictable pages that bypasses the per-cpu pvec. Some of
+ * the pages might have meanwhile become unevictable but that is OK.
+ */
+static void __putback_lru_fast(struct pagevec *pvec, int pgrescued)
+{
+       count_vm_events(UNEVICTABLE_PGMUNLOCKED, pagevec_count(pvec));
+       /*
+        *__pagevec_lru_add() calls release_pages() so we don't call
+        * put_page() explicitly
+        */
+       __pagevec_lru_add(pvec);
+       count_vm_events(UNEVICTABLE_PGRESCUED, pgrescued);
+}
+
+/*
+ * Munlock a batch of pages from the same zone
+ *
+ * The work is split to two main phases. First phase clears the Mlocked flag
+ * and attempts to isolate the pages, all under a single zone lru lock.
+ * The second phase finishes the munlock only for pages where isolation
+ * succeeded.
+ *
+ * Note that the pagevec may be modified during the process.
+ */
+static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
+{
+       int i;
+       int nr = pagevec_count(pvec);
+       int delta_munlocked = -nr;
+       struct pagevec pvec_putback;
+       int pgrescued = 0;
+
+       /* Phase 1: page isolation */
+       spin_lock_irq(&zone->lru_lock);
+       for (i = 0; i < nr; i++) {
+               struct page *page = pvec->pages[i];
+
+               if (TestClearPageMlocked(page)) {
+                       struct lruvec *lruvec;
+                       int lru;
+
+                       if (PageLRU(page)) {
+                               lruvec = mem_cgroup_page_lruvec(page, zone);
+                               lru = page_lru(page);
+                               /*
+                                * We already have pin from follow_page_mask()
+                                * so we can spare the get_page() here.
+                                */
+                               ClearPageLRU(page);
+                               del_page_from_lru_list(page, lruvec, lru);
+                       } else {
+                               __munlock_isolation_failed(page);
+                               goto skip_munlock;
+                       }
+
+               } else {
+skip_munlock:
+                       /*
+                        * We won't be munlocking this page in the next phase
+                        * but we still need to release the follow_page_mask()
+                        * pin.
+                        */
+                       pvec->pages[i] = NULL;
+                       put_page(page);
+                       delta_munlocked++;
+               }
+       }
+       __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
+       spin_unlock_irq(&zone->lru_lock);
+
+       /* Phase 2: page munlock */
+       pagevec_init(&pvec_putback, 0);
+       for (i = 0; i < nr; i++) {
+               struct page *page = pvec->pages[i];
+
+               if (page) {
+                       lock_page(page);
+                       if (!__putback_lru_fast_prepare(page, &pvec_putback,
+                                       &pgrescued)) {
+                               /*
+                                * Slow path. We don't want to lose the last
+                                * pin before unlock_page()
+                                */
+                               get_page(page); /* for putback_lru_page() */
+                               __munlock_isolated_page(page);
+                               unlock_page(page);
+                               put_page(page); /* from follow_page_mask() */
+                       }
+               }
+       }
+
+       /*
+        * Phase 3: page putback for pages that qualified for the fast path
+        * This will also call put_page() to return pin from follow_page_mask()
+        */
+       if (pagevec_count(&pvec_putback))
+               __putback_lru_fast(&pvec_putback, pgrescued);
+}
+
+/*
+ * Fill up pagevec for __munlock_pagevec using pte walk
+ *
+ * The function expects that the struct page corresponding to @start address is
+ * a non-TPH page already pinned and in the @pvec, and that it belongs to @zone.
+ *
+ * The rest of @pvec is filled by subsequent pages within the same pmd and same
+ * zone, as long as the pte's are present and vm_normal_page() succeeds. These
+ * pages also get pinned.
+ *
+ * Returns the address of the next page that should be scanned. This equals
+ * @start + PAGE_SIZE when no page could be added by the pte walk.
+ */
+static unsigned long __munlock_pagevec_fill(struct pagevec *pvec,
+               struct vm_area_struct *vma, int zoneid, unsigned long start,
+               unsigned long end)
+{
+       pte_t *pte;
+       spinlock_t *ptl;
+
+       /*
+        * Initialize pte walk starting at the already pinned page where we
+        * are sure that there is a pte.
+        */
+       pte = get_locked_pte(vma->vm_mm, start, &ptl);
+       end = min(end, pmd_addr_end(start, end));
+
+       /* The page next to the pinned page is the first we will try to get */
+       start += PAGE_SIZE;
+       while (start < end) {
+               struct page *page = NULL;
+               pte++;
+               if (pte_present(*pte))
+                       page = vm_normal_page(vma, start, *pte);
+               /*
+                * Break if page could not be obtained or the page's node+zone does not
+                * match
+                */
+               if (!page || page_zone_id(page) != zoneid)
+                       break;
+
+               get_page(page);
+               /*
+                * Increase the address that will be returned *before* the
+                * eventual break due to pvec becoming full by adding the page
+                */
+               start += PAGE_SIZE;
+               if (pagevec_add(pvec, page) == 0)
+                       break;
+       }
+       pte_unmap_unlock(pte, ptl);
+       return start;
+}
+
 /*
  * munlock_vma_pages_range() - munlock all pages in the vma range.'
  * @vma - vma containing range to be munlock()ed.
@@ -233,9 +435,13 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
        vma->vm_flags &= ~VM_LOCKED;
 
        while (start < end) {
-               struct page *page;
+               struct page *page = NULL;
                unsigned int page_mask, page_increm;
+               struct pagevec pvec;
+               struct zone *zone;
+               int zoneid;
 
+               pagevec_init(&pvec, 0);
                /*
                 * Although FOLL_DUMP is intended for get_dump_page(),
                 * it just so happens that its special treatment of the
@@ -244,21 +450,45 @@ void munlock_vma_pages_range(struct vm_area_struct *vma,
                 * has sneaked into the range, we won't oops here: great).
                 */
                page = follow_page_mask(vma, start, FOLL_GET | FOLL_DUMP,
-                                       &page_mask);
+                               &page_mask);
+
                if (page && !IS_ERR(page)) {
-                       lock_page(page);
-                       lru_add_drain();
-                       /*
-                        * Any THP page found by follow_page_mask() may have
-                        * gotten split before reaching munlock_vma_page(),
-                        * so we need to recompute the page_mask here.
-                        */
-                       page_mask = munlock_vma_page(page);
-                       unlock_page(page);
-                       put_page(page);
+                       if (PageTransHuge(page)) {
+                               lock_page(page);
+                               /*
+                                * Any THP page found by follow_page_mask() may
+                                * have gotten split before reaching
+                                * munlock_vma_page(), so we need to recompute
+                                * the page_mask here.
+                                */
+                               page_mask = munlock_vma_page(page);
+                               unlock_page(page);
+                               put_page(page); /* follow_page_mask() */
+                       } else {
+                               /*
+                                * Non-huge pages are handled in batches via
+                                * pagevec. The pin from follow_page_mask()
+                                * prevents them from collapsing by THP.
+                                */
+                               pagevec_add(&pvec, page);
+                               zone = page_zone(page);
+                               zoneid = page_zone_id(page);
+
+                               /*
+                                * Try to fill the rest of pagevec using fast
+                                * pte walk. This will also update start to
+                                * the next page to process. Then munlock the
+                                * pagevec.
+                                */
+                               start = __munlock_pagevec_fill(&pvec, vma,
+                                               zoneid, start, end);
+                               __munlock_pagevec(&pvec, zone);
+                               goto next;
+                       }
                }
                page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask);
                start += page_increm * PAGE_SIZE;
+next:
                cond_resched();
        }
 }
index f9c97d1..9d54851 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1202,7 +1202,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        unsigned long *populate)
 {
        struct mm_struct * mm = current->mm;
-       struct inode *inode;
        vm_flags_t vm_flags;
 
        *populate = 0;
@@ -1265,9 +1264,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                        return -EAGAIN;
        }
 
-       inode = file ? file_inode(file) : NULL;
-
        if (file) {
+               struct inode *inode = file_inode(file);
+
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
                        if ((prot&PROT_WRITE) && !(file->f_mode&FMODE_WRITE))
@@ -1302,6 +1301,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
 
                        if (!file->f_op || !file->f_op->mmap)
                                return -ENODEV;
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        break;
 
                default:
@@ -1310,6 +1311,8 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
        } else {
                switch (flags & MAP_TYPE) {
                case MAP_SHARED:
+                       if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
+                               return -EINVAL;
                        /*
                         * Ignore pgoff.
                         */
@@ -1476,11 +1479,9 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
 {
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *vma, *prev;
-       int correct_wcount = 0;
        int error;
        struct rb_node **rb_link, *rb_parent;
        unsigned long charged = 0;
-       struct inode *inode =  file ? file_inode(file) : NULL;
 
        /* Check against address space limit. */
        if (!may_expand_vm(mm, len >> PAGE_SHIFT)) {
@@ -1544,16 +1545,11 @@ munmap_back:
        vma->vm_pgoff = pgoff;
        INIT_LIST_HEAD(&vma->anon_vma_chain);
 
-       error = -EINVAL;        /* when rejecting VM_GROWSDOWN|VM_GROWSUP */
-
        if (file) {
-               if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
-                       goto free_vma;
                if (vm_flags & VM_DENYWRITE) {
                        error = deny_write_access(file);
                        if (error)
                                goto free_vma;
-                       correct_wcount = 1;
                }
                vma->vm_file = get_file(file);
                error = file->f_op->mmap(file, vma);
@@ -1570,11 +1566,8 @@ munmap_back:
                WARN_ON_ONCE(addr != vma->vm_start);
 
                addr = vma->vm_start;
-               pgoff = vma->vm_pgoff;
                vm_flags = vma->vm_flags;
        } else if (vm_flags & VM_SHARED) {
-               if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
-                       goto free_vma;
                error = shmem_zero_setup(vma);
                if (error)
                        goto free_vma;
@@ -1596,11 +1589,10 @@ munmap_back:
        }
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
-       file = vma->vm_file;
-
        /* Once vma denies write, undo our temporary denial count */
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
+       file = vma->vm_file;
 out:
        perf_event_mmap(vma);
 
@@ -1616,11 +1608,20 @@ out:
        if (file)
                uprobe_mmap(vma);
 
+       /*
+        * New (or expanded) vma always get soft dirty status.
+        * Otherwise user-space soft-dirty page tracker won't
+        * be able to distinguish situation when vma area unmapped,
+        * then new mapped in-place (which must be aimed as
+        * a completely new data area).
+        */
+       vma->vm_flags |= VM_SOFTDIRTY;
+
        return addr;
 
 unmap_and_free_vma:
-       if (correct_wcount)
-               atomic_inc(&inode->i_writecount);
+       if (vm_flags & VM_DENYWRITE)
+               allow_write_access(file);
        vma->vm_file = NULL;
        fput(file);
 
@@ -2380,7 +2381,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
 static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
              unsigned long addr, int new_below)
 {
-       struct mempolicy *pol;
        struct vm_area_struct *new;
        int err = -ENOMEM;
 
@@ -2404,12 +2404,9 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                new->vm_pgoff += ((addr - vma->vm_start) >> PAGE_SHIFT);
        }
 
-       pol = mpol_dup(vma_policy(vma));
-       if (IS_ERR(pol)) {
-               err = PTR_ERR(pol);
+       err = vma_dup_policy(vma, new);
+       if (err)
                goto out_free_vma;
-       }
-       vma_set_policy(new, pol);
 
        if (anon_vma_clone(new, vma))
                goto out_free_mpol;
@@ -2437,7 +2434,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
                fput(new->vm_file);
        unlink_anon_vmas(new);
  out_free_mpol:
-       mpol_put(pol);
+       mpol_put(vma_policy(new));
  out_free_vma:
        kmem_cache_free(vm_area_cachep, new);
  out_err:
@@ -2663,6 +2660,7 @@ out:
        mm->total_vm += len >> PAGE_SHIFT;
        if (flags & VM_LOCKED)
                mm->locked_vm += (len >> PAGE_SHIFT);
+       vma->vm_flags |= VM_SOFTDIRTY;
        return addr;
 }
 
@@ -2780,7 +2778,6 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        struct mm_struct *mm = vma->vm_mm;
        struct vm_area_struct *new_vma, *prev;
        struct rb_node **rb_link, *rb_parent;
-       struct mempolicy *pol;
        bool faulted_in_anon_vma = true;
 
        /*
@@ -2825,10 +2822,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                        new_vma->vm_start = addr;
                        new_vma->vm_end = addr + len;
                        new_vma->vm_pgoff = pgoff;
-                       pol = mpol_dup(vma_policy(vma));
-                       if (IS_ERR(pol))
+                       if (vma_dup_policy(vma, new_vma))
                                goto out_free_vma;
-                       vma_set_policy(new_vma, pol);
                        INIT_LIST_HEAD(&new_vma->anon_vma_chain);
                        if (anon_vma_clone(new_vma, vma))
                                goto out_free_mempol;
@@ -2843,7 +2838,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
        return new_vma;
 
  out_free_mempol:
-       mpol_put(pol);
+       mpol_put(vma_policy(new_vma));
  out_free_vma:
        kmem_cache_free(vm_area_cachep, new_vma);
        return NULL;
@@ -2930,7 +2925,7 @@ int install_special_mapping(struct mm_struct *mm,
        vma->vm_start = addr;
        vma->vm_end = addr + len;
 
-       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND;
+       vma->vm_flags = vm_flags | mm->def_flags | VM_DONTEXPAND | VM_SOFTDIRTY;
        vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
        vma->vm_ops = &special_mapping_vmops;
index 0843feb..91b13d6 100644 (file)
@@ -25,6 +25,7 @@
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
+#include <asm/pgalloc.h>
 
 #include "internal.h"
 
@@ -62,8 +63,10 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
                return NULL;
 
        pmd = pmd_alloc(mm, pud, addr);
-       if (!pmd)
+       if (!pmd) {
+               pud_free(mm, pud);
                return NULL;
+       }
 
        VM_BUG_ON(pmd_trans_huge(*pmd));
 
index 98e75f2..314e9d2 100644 (file)
@@ -678,9 +678,12 @@ out:
  */
 void pagefault_out_of_memory(void)
 {
-       struct zonelist *zonelist = node_zonelist(first_online_node,
-                                                 GFP_KERNEL);
+       struct zonelist *zonelist;
 
+       if (mem_cgroup_oom_synchronize())
+               return;
+
+       zonelist = node_zonelist(first_online_node, GFP_KERNEL);
        if (try_set_zonelist_oom(zonelist, GFP_KERNEL)) {
                out_of_memory(NULL, 0, 0, NULL, false);
                clear_zonelist_oom(zonelist, GFP_KERNEL);
index 3f0c895..f5236f8 100644 (file)
 #include <linux/pagevec.h>
 #include <linux/timer.h>
 #include <linux/sched/rt.h>
+#include <linux/mm_inline.h>
 #include <trace/events/writeback.h>
 
+#include "internal.h"
+
 /*
  * Sleep at most 200ms at a time in balance_dirty_pages().
  */
@@ -241,9 +244,6 @@ static unsigned long global_dirtyable_memory(void)
        if (!vm_highmem_is_dirtyable)
                x -= highmem_dirtyable_memory(x);
 
-       /* Subtract min_free_kbytes */
-       x -= min_t(unsigned long, x, min_free_kbytes >> (PAGE_SHIFT - 10));
-
        return x + 1;   /* Ensure that we never return 0 */
 }
 
@@ -584,6 +584,37 @@ unsigned long bdi_dirty_limit(struct backing_dev_info *bdi, unsigned long dirty)
        return bdi_dirty;
 }
 
+/*
+ *                           setpoint - dirty 3
+ *        f(dirty) := 1.0 + (----------------)
+ *                           limit - setpoint
+ *
+ * it's a 3rd order polynomial that subjects to
+ *
+ * (1) f(freerun)  = 2.0 => rampup dirty_ratelimit reasonably fast
+ * (2) f(setpoint) = 1.0 => the balance point
+ * (3) f(limit)    = 0   => the hard limit
+ * (4) df/dx      <= 0  => negative feedback control
+ * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
+ *     => fast response on large errors; small oscillation near setpoint
+ */
+static inline long long pos_ratio_polynom(unsigned long setpoint,
+                                         unsigned long dirty,
+                                         unsigned long limit)
+{
+       long long pos_ratio;
+       long x;
+
+       x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
+                   limit - setpoint + 1);
+       pos_ratio = x;
+       pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
+       pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
+       pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
+
+       return clamp(pos_ratio, 0LL, 2LL << RATELIMIT_CALC_SHIFT);
+}
+
 /*
  * Dirty position control.
  *
@@ -682,26 +713,80 @@ static unsigned long bdi_position_ratio(struct backing_dev_info *bdi,
        /*
         * global setpoint
         *
-        *                           setpoint - dirty 3
-        *        f(dirty) := 1.0 + (----------------)
-        *                           limit - setpoint
+        * See comment for pos_ratio_polynom().
+        */
+       setpoint = (freerun + limit) / 2;
+       pos_ratio = pos_ratio_polynom(setpoint, dirty, limit);
+
+       /*
+        * The strictlimit feature is a tool preventing mistrusted filesystems
+        * from growing a large number of dirty pages before throttling. For
+        * such filesystems balance_dirty_pages always checks bdi counters
+        * against bdi limits. Even if global "nr_dirty" is under "freerun".
+        * This is especially important for fuse which sets bdi->max_ratio to
+        * 1% by default. Without strictlimit feature, fuse writeback may
+        * consume arbitrary amount of RAM because it is accounted in
+        * NR_WRITEBACK_TEMP which is not involved in calculating "nr_dirty".
         *
-        * it's a 3rd order polynomial that subjects to
+        * Here, in bdi_position_ratio(), we calculate pos_ratio based on
+        * two values: bdi_dirty and bdi_thresh. Let's consider an example:
+        * total amount of RAM is 16GB, bdi->max_ratio is equal to 1%, global
+        * limits are set by default to 10% and 20% (background and throttle).
+        * Then bdi_thresh is 1% of 20% of 16GB. This amounts to ~8K pages.
+        * bdi_dirty_limit(bdi, bg_thresh) is about ~4K pages. bdi_setpoint is
+        * about ~6K pages (as the average of background and throttle bdi
+        * limits). The 3rd order polynomial will provide positive feedback if
+        * bdi_dirty is under bdi_setpoint and vice versa.
         *
-        * (1) f(freerun)  = 2.0 => rampup dirty_ratelimit reasonably fast
-        * (2) f(setpoint) = 1.0 => the balance point
-        * (3) f(limit)    = 0   => the hard limit
-        * (4) df/dx      <= 0   => negative feedback control
-        * (5) the closer to setpoint, the smaller |df/dx| (and the reverse)
-        *     => fast response on large errors; small oscillation near setpoint
+        * Note, that we cannot use global counters in these calculations
+        * because we want to throttle process writing to a strictlimit BDI
+        * much earlier than global "freerun" is reached (~23MB vs. ~2.3GB
+        * in the example above).
         */
-       setpoint = (freerun + limit) / 2;
-       x = div_s64(((s64)setpoint - (s64)dirty) << RATELIMIT_CALC_SHIFT,
-                   limit - setpoint + 1);
-       pos_ratio = x;
-       pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
-       pos_ratio = pos_ratio * x >> RATELIMIT_CALC_SHIFT;
-       pos_ratio += 1 << RATELIMIT_CALC_SHIFT;
+       if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
+               long long bdi_pos_ratio;
+               unsigned long bdi_bg_thresh;
+
+               if (bdi_dirty < 8)
+                       return min_t(long long, pos_ratio * 2,
+                                    2 << RATELIMIT_CALC_SHIFT);
+
+               if (bdi_dirty >= bdi_thresh)
+                       return 0;
+
+               bdi_bg_thresh = div_u64((u64)bdi_thresh * bg_thresh, thresh);
+               bdi_setpoint = dirty_freerun_ceiling(bdi_thresh,
+                                                    bdi_bg_thresh);
+
+               if (bdi_setpoint == 0 || bdi_setpoint == bdi_thresh)
+                       return 0;
+
+               bdi_pos_ratio = pos_ratio_polynom(bdi_setpoint, bdi_dirty,
+                                                 bdi_thresh);
+
+               /*
+                * Typically, for strictlimit case, bdi_setpoint << setpoint
+                * and pos_ratio >> bdi_pos_ratio. In the other words global
+                * state ("dirty") is not limiting factor and we have to
+                * make decision based on bdi counters. But there is an
+                * important case when global pos_ratio should get precedence:
+                * global limits are exceeded (e.g. due to activities on other
+                * BDIs) while given strictlimit BDI is below limit.
+                *
+                * "pos_ratio * bdi_pos_ratio" would work for the case above,
+                * but it would look too non-natural for the case of all
+                * activity in the system coming from a single strictlimit BDI
+                * with bdi->max_ratio == 100%.
+                *
+                * Note that min() below somewhat changes the dynamics of the
+                * control system. Normally, pos_ratio value can be well over 3
+                * (when globally we are at freerun and bdi is well below bdi
+                * setpoint). Now the maximum pos_ratio in the same situation
+                * is 2. We might want to tweak this if we observe the control
+                * system is too slow to adapt.
+                */
+               return min(pos_ratio, bdi_pos_ratio);
+       }
 
        /*
         * We have computed basic pos_ratio above based on global situation. If
@@ -994,6 +1079,27 @@ static void bdi_update_dirty_ratelimit(struct backing_dev_info *bdi,
         * keep that period small to reduce time lags).
         */
        step = 0;
+
+       /*
+        * For strictlimit case, calculations above were based on bdi counters
+        * and limits (starting from pos_ratio = bdi_position_ratio() and up to
+        * balanced_dirty_ratelimit = task_ratelimit * write_bw / dirty_rate).
+        * Hence, to calculate "step" properly, we have to use bdi_dirty as
+        * "dirty" and bdi_setpoint as "setpoint".
+        *
+        * We rampup dirty_ratelimit forcibly if bdi_dirty is low because
+        * it's possible that bdi_thresh is close to zero due to inactivity
+        * of backing device (see the implementation of bdi_dirty_limit()).
+        */
+       if (unlikely(bdi->capabilities & BDI_CAP_STRICTLIMIT)) {
+               dirty = bdi_dirty;
+               if (bdi_dirty < 8)
+                       setpoint = bdi_dirty + 1;
+               else
+                       setpoint = (bdi_thresh +
+                                   bdi_dirty_limit(bdi, bg_thresh)) / 2;
+       }
+
        if (dirty < setpoint) {
                x = min(bdi->balanced_dirty_ratelimit,
                         min(balanced_dirty_ratelimit, task_ratelimit));
@@ -1198,6 +1304,56 @@ static long bdi_min_pause(struct backing_dev_info *bdi,
        return pages >= DIRTY_POLL_THRESH ? 1 + t / 2 : t;
 }
 
+static inline void bdi_dirty_limits(struct backing_dev_info *bdi,
+                                   unsigned long dirty_thresh,
+                                   unsigned long background_thresh,
+                                   unsigned long *bdi_dirty,
+                                   unsigned long *bdi_thresh,
+                                   unsigned long *bdi_bg_thresh)
+{
+       unsigned long bdi_reclaimable;
+
+       /*
+        * bdi_thresh is not treated as some limiting factor as
+        * dirty_thresh, due to reasons
+        * - in JBOD setup, bdi_thresh can fluctuate a lot
+        * - in a system with HDD and USB key, the USB key may somehow
+        *   go into state (bdi_dirty >> bdi_thresh) either because
+        *   bdi_dirty starts high, or because bdi_thresh drops low.
+        *   In this case we don't want to hard throttle the USB key
+        *   dirtiers for 100 seconds until bdi_dirty drops under
+        *   bdi_thresh. Instead the auxiliary bdi control line in
+        *   bdi_position_ratio() will let the dirtier task progress
+        *   at some rate <= (write_bw / 2) for bringing down bdi_dirty.
+        */
+       *bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
+
+       if (bdi_bg_thresh)
+               *bdi_bg_thresh = div_u64((u64)*bdi_thresh *
+                                        background_thresh,
+                                        dirty_thresh);
+
+       /*
+        * In order to avoid the stacked BDI deadlock we need
+        * to ensure we accurately count the 'dirty' pages when
+        * the threshold is low.
+        *
+        * Otherwise it would be possible to get thresh+n pages
+        * reported dirty, even though there are thresh-m pages
+        * actually dirty; with m+n sitting in the percpu
+        * deltas.
+        */
+       if (*bdi_thresh < 2 * bdi_stat_error(bdi)) {
+               bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
+               *bdi_dirty = bdi_reclaimable +
+                       bdi_stat_sum(bdi, BDI_WRITEBACK);
+       } else {
+               bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
+               *bdi_dirty = bdi_reclaimable +
+                       bdi_stat(bdi, BDI_WRITEBACK);
+       }
+}
+
 /*
  * balance_dirty_pages() must be called by processes which are generating dirty
  * data.  It looks at the number of dirty pages in the machine and will force
@@ -1209,13 +1365,9 @@ static void balance_dirty_pages(struct address_space *mapping,
                                unsigned long pages_dirtied)
 {
        unsigned long nr_reclaimable;   /* = file_dirty + unstable_nfs */
-       unsigned long bdi_reclaimable;
        unsigned long nr_dirty;  /* = file_dirty + writeback + unstable_nfs */
-       unsigned long bdi_dirty;
-       unsigned long freerun;
        unsigned long background_thresh;
        unsigned long dirty_thresh;
-       unsigned long bdi_thresh;
        long period;
        long pause;
        long max_pause;
@@ -1226,10 +1378,16 @@ static void balance_dirty_pages(struct address_space *mapping,
        unsigned long dirty_ratelimit;
        unsigned long pos_ratio;
        struct backing_dev_info *bdi = mapping->backing_dev_info;
+       bool strictlimit = bdi->capabilities & BDI_CAP_STRICTLIMIT;
        unsigned long start_time = jiffies;
 
        for (;;) {
                unsigned long now = jiffies;
+               unsigned long uninitialized_var(bdi_thresh);
+               unsigned long thresh;
+               unsigned long uninitialized_var(bdi_dirty);
+               unsigned long dirty;
+               unsigned long bg_thresh;
 
                /*
                 * Unstable writes are a feature of certain networked
@@ -1243,61 +1401,44 @@ static void balance_dirty_pages(struct address_space *mapping,
 
                global_dirty_limits(&background_thresh, &dirty_thresh);
 
+               if (unlikely(strictlimit)) {
+                       bdi_dirty_limits(bdi, dirty_thresh, background_thresh,
+                                        &bdi_dirty, &bdi_thresh, &bg_thresh);
+
+                       dirty = bdi_dirty;
+                       thresh = bdi_thresh;
+               } else {
+                       dirty = nr_dirty;
+                       thresh = dirty_thresh;
+                       bg_thresh = background_thresh;
+               }
+
                /*
                 * Throttle it only when the background writeback cannot
                 * catch-up. This avoids (excessively) small writeouts
-                * when the bdi limits are ramping up.
+                * when the bdi limits are ramping up in case of !strictlimit.
+                *
+                * In strictlimit case make decision based on the bdi counters
+                * and limits. Small writeouts when the bdi limits are ramping
+                * up are the price we consciously pay for strictlimit-ing.
                 */
-               freerun = dirty_freerun_ceiling(dirty_thresh,
-                                               background_thresh);
-               if (nr_dirty <= freerun) {
+               if (dirty <= dirty_freerun_ceiling(thresh, bg_thresh)) {
                        current->dirty_paused_when = now;
                        current->nr_dirtied = 0;
                        current->nr_dirtied_pause =
-                               dirty_poll_interval(nr_dirty, dirty_thresh);
+                               dirty_poll_interval(dirty, thresh);
                        break;
                }
 
                if (unlikely(!writeback_in_progress(bdi)))
                        bdi_start_background_writeback(bdi);
 
-               /*
-                * bdi_thresh is not treated as some limiting factor as
-                * dirty_thresh, due to reasons
-                * - in JBOD setup, bdi_thresh can fluctuate a lot
-                * - in a system with HDD and USB key, the USB key may somehow
-                *   go into state (bdi_dirty >> bdi_thresh) either because
-                *   bdi_dirty starts high, or because bdi_thresh drops low.
-                *   In this case we don't want to hard throttle the USB key
-                *   dirtiers for 100 seconds until bdi_dirty drops under
-                *   bdi_thresh. Instead the auxiliary bdi control line in
-                *   bdi_position_ratio() will let the dirtier task progress
-                *   at some rate <= (write_bw / 2) for bringing down bdi_dirty.
-                */
-               bdi_thresh = bdi_dirty_limit(bdi, dirty_thresh);
-
-               /*
-                * In order to avoid the stacked BDI deadlock we need
-                * to ensure we accurately count the 'dirty' pages when
-                * the threshold is low.
-                *
-                * Otherwise it would be possible to get thresh+n pages
-                * reported dirty, even though there are thresh-m pages
-                * actually dirty; with m+n sitting in the percpu
-                * deltas.
-                */
-               if (bdi_thresh < 2 * bdi_stat_error(bdi)) {
-                       bdi_reclaimable = bdi_stat_sum(bdi, BDI_RECLAIMABLE);
-                       bdi_dirty = bdi_reclaimable +
-                                   bdi_stat_sum(bdi, BDI_WRITEBACK);
-               } else {
-                       bdi_reclaimable = bdi_stat(bdi, BDI_RECLAIMABLE);
-                       bdi_dirty = bdi_reclaimable +
-                                   bdi_stat(bdi, BDI_WRITEBACK);
-               }
+               if (!strictlimit)
+                       bdi_dirty_limits(bdi, dirty_thresh, background_thresh,
+                                        &bdi_dirty, &bdi_thresh, NULL);
 
                dirty_exceeded = (bdi_dirty > bdi_thresh) &&
-                                 (nr_dirty > dirty_thresh);
+                                ((nr_dirty > dirty_thresh) || strictlimit);
                if (dirty_exceeded && !bdi->dirty_exceeded)
                        bdi->dirty_exceeded = 1;
 
@@ -2002,11 +2143,17 @@ EXPORT_SYMBOL(account_page_dirtied);
 
 /*
  * Helper function for set_page_writeback family.
+ *
+ * The caller must hold mem_cgroup_begin/end_update_page_stat() lock
+ * while calling this function.
+ * See test_set_page_writeback for example.
+ *
  * NOTE: Unlike account_page_dirtied this does not rely on being atomic
  * wrt interrupts.
  */
 void account_page_writeback(struct page *page)
 {
+       mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
        inc_zone_page_state(page, NR_WRITEBACK);
 }
 EXPORT_SYMBOL(account_page_writeback);
@@ -2223,7 +2370,10 @@ int test_clear_page_writeback(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
        int ret;
+       bool locked;
+       unsigned long memcg_flags;
 
+       mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
        if (mapping) {
                struct backing_dev_info *bdi = mapping->backing_dev_info;
                unsigned long flags;
@@ -2244,9 +2394,11 @@ int test_clear_page_writeback(struct page *page)
                ret = TestClearPageWriteback(page);
        }
        if (ret) {
+               mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
                dec_zone_page_state(page, NR_WRITEBACK);
                inc_zone_page_state(page, NR_WRITTEN);
        }
+       mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
        return ret;
 }
 
@@ -2254,7 +2406,10 @@ int test_set_page_writeback(struct page *page)
 {
        struct address_space *mapping = page_mapping(page);
        int ret;
+       bool locked;
+       unsigned long memcg_flags;
 
+       mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
        if (mapping) {
                struct backing_dev_info *bdi = mapping->backing_dev_info;
                unsigned long flags;
@@ -2281,6 +2436,7 @@ int test_set_page_writeback(struct page *page)
        }
        if (!ret)
                account_page_writeback(page);
+       mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
        return ret;
 
 }
index c2b59db..0ee638f 100644 (file)
@@ -56,6 +56,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/memcontrol.h>
 #include <linux/prefetch.h>
+#include <linux/mm_inline.h>
 #include <linux/migrate.h>
 #include <linux/page-debug-flags.h>
 #include <linux/hugetlb.h>
@@ -488,8 +489,10 @@ __find_buddy_index(unsigned long page_idx, unsigned int order)
  * (c) a page and its buddy have the same order &&
  * (d) a page and its buddy are in the same zone.
  *
- * For recording whether a page is in the buddy system, we set ->_mapcount -2.
- * Setting, clearing, and testing _mapcount -2 is serialized by zone->lock.
+ * For recording whether a page is in the buddy system, we set ->_mapcount
+ * PAGE_BUDDY_MAPCOUNT_VALUE.
+ * Setting, clearing, and testing _mapcount PAGE_BUDDY_MAPCOUNT_VALUE is
+ * serialized by zone->lock.
  *
  * For recording page's order, we use page_private(page).
  */
@@ -527,8 +530,9 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
  * as necessary, plus some accounting needed to play nicely with other
  * parts of the VM system.
  * At each level, we keep a list of pages, which are heads of continuous
- * free pages of length of (1 << order) and marked with _mapcount -2. Page's
- * order is recorded in page_private(page) field.
+ * free pages of length of (1 << order) and marked with _mapcount
+ * PAGE_BUDDY_MAPCOUNT_VALUE. Page's order is recorded in page_private(page)
+ * field.
  * So when we are allocating or freeing one, we can derive the state of the
  * other.  That is, if we allocate a small block, and both were
  * free, the remainder of the region must be split into blocks.
@@ -647,7 +651,6 @@ static void free_pcppages_bulk(struct zone *zone, int count,
        int to_free = count;
 
        spin_lock(&zone->lock);
-       zone->all_unreclaimable = 0;
        zone->pages_scanned = 0;
 
        while (to_free) {
@@ -696,7 +699,6 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
                                int migratetype)
 {
        spin_lock(&zone->lock);
-       zone->all_unreclaimable = 0;
        zone->pages_scanned = 0;
 
        __free_one_page(page, zone, order, migratetype);
@@ -721,7 +723,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
                return false;
 
        if (!PageHighMem(page)) {
-               debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
+               debug_check_no_locks_freed(page_address(page),
+                                          PAGE_SIZE << order);
                debug_check_no_obj_freed(page_address(page),
                                           PAGE_SIZE << order);
        }
@@ -750,19 +753,19 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 void __init __free_pages_bootmem(struct page *page, unsigned int order)
 {
        unsigned int nr_pages = 1 << order;
+       struct page *p = page;
        unsigned int loop;
 
-       prefetchw(page);
-       for (loop = 0; loop < nr_pages; loop++) {
-               struct page *p = &page[loop];
-
-               if (loop + 1 < nr_pages)
-                       prefetchw(p + 1);
+       prefetchw(p);
+       for (loop = 0; loop < (nr_pages - 1); loop++, p++) {
+               prefetchw(p + 1);
                __ClearPageReserved(p);
                set_page_count(p, 0);
        }
+       __ClearPageReserved(p);
+       set_page_count(p, 0);
 
-       page_zone(page)->managed_pages += 1 << order;
+       page_zone(page)->managed_pages += nr_pages;
        set_page_refcounted(page);
        __free_pages(page, order);
 }
@@ -885,7 +888,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
                                                int migratetype)
 {
        unsigned int current_order;
-       struct free_area * area;
+       struct free_area *area;
        struct page *page;
 
        /* Find a page of the appropriate size in the preferred list */
@@ -1007,14 +1010,60 @@ static void change_pageblock_range(struct page *pageblock_page,
        }
 }
 
+/*
+ * If breaking a large block of pages, move all free pages to the preferred
+ * allocation list. If falling back for a reclaimable kernel allocation, be
+ * more aggressive about taking ownership of free pages.
+ *
+ * On the other hand, never change migration type of MIGRATE_CMA pageblocks
+ * nor move CMA pages to different free lists. We don't want unmovable pages
+ * to be allocated from MIGRATE_CMA areas.
+ *
+ * Returns the new migratetype of the pageblock (or the same old migratetype
+ * if it was unchanged).
+ */
+static int try_to_steal_freepages(struct zone *zone, struct page *page,
+                                 int start_type, int fallback_type)
+{
+       int current_order = page_order(page);
+
+       if (is_migrate_cma(fallback_type))
+               return fallback_type;
+
+       /* Take ownership for orders >= pageblock_order */
+       if (current_order >= pageblock_order) {
+               change_pageblock_range(page, current_order, start_type);
+               return start_type;
+       }
+
+       if (current_order >= pageblock_order / 2 ||
+           start_type == MIGRATE_RECLAIMABLE ||
+           page_group_by_mobility_disabled) {
+               int pages;
+
+               pages = move_freepages_block(zone, page, start_type);
+
+               /* Claim the whole block if over half of it is free */
+               if (pages >= (1 << (pageblock_order-1)) ||
+                               page_group_by_mobility_disabled) {
+
+                       set_pageblock_migratetype(page, start_type);
+                       return start_type;
+               }
+
+       }
+
+       return fallback_type;
+}
+
 /* Remove an element from the buddy allocator from the fallback list */
 static inline struct page *
 __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
 {
-       struct free_area * area;
+       struct free_area *area;
        int current_order;
        struct page *page;
-       int migratetype, i;
+       int migratetype, new_type, i;
 
        /* Find the largest possible block of pages in the other list */
        for (current_order = MAX_ORDER-1; current_order >= order;
@@ -1034,51 +1083,29 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype)
                                        struct page, lru);
                        area->nr_free--;
 
-                       /*
-                        * If breaking a large block of pages, move all free
-                        * pages to the preferred allocation list. If falling
-                        * back for a reclaimable kernel allocation, be more
-                        * aggressive about taking ownership of free pages
-                        *
-                        * On the other hand, never change migration
-                        * type of MIGRATE_CMA pageblocks nor move CMA
-                        * pages on different free lists. We don't
-                        * want unmovable pages to be allocated from
-                        * MIGRATE_CMA areas.
-                        */
-                       if (!is_migrate_cma(migratetype) &&
-                           (current_order >= pageblock_order / 2 ||
-                            start_migratetype == MIGRATE_RECLAIMABLE ||
-                            page_group_by_mobility_disabled)) {
-                               int pages;
-                               pages = move_freepages_block(zone, page,
-                                                               start_migratetype);
-
-                               /* Claim the whole block if over half of it is free */
-                               if (pages >= (1 << (pageblock_order-1)) ||
-                                               page_group_by_mobility_disabled)
-                                       set_pageblock_migratetype(page,
-                                                               start_migratetype);
-
-                               migratetype = start_migratetype;
-                       }
+                       new_type = try_to_steal_freepages(zone, page,
+                                                         start_migratetype,
+                                                         migratetype);
 
                        /* Remove the page from the freelists */
                        list_del(&page->lru);
                        rmv_page_order(page);
 
-                       /* Take ownership for orders >= pageblock_order */
-                       if (current_order >= pageblock_order &&
-                           !is_migrate_cma(migratetype))
-                               change_pageblock_range(page, current_order,
-                                                       start_migratetype);
-
+                       /*
+                        * Borrow the excess buddy pages as well, irrespective
+                        * of whether we stole freepages, or took ownership of
+                        * the pageblock or not.
+                        *
+                        * Exception: When borrowing from MIGRATE_CMA, release
+                        * the excess buddy pages to CMA itself.
+                        */
                        expand(zone, page, order, current_order, area,
                               is_migrate_cma(migratetype)
                             ? migratetype : start_migratetype);
 
-                       trace_mm_page_alloc_extfrag(page, order, current_order,
-                               start_migratetype, migratetype);
+                       trace_mm_page_alloc_extfrag(page, order,
+                               current_order, start_migratetype, migratetype,
+                               new_type == start_migratetype);
 
                        return page;
                }
@@ -1281,7 +1308,7 @@ void mark_free_pages(struct zone *zone)
        int order, t;
        struct list_head *curr;
 
-       if (!zone->spanned_pages)
+       if (zone_is_empty(zone))
                return;
 
        spin_lock_irqsave(&zone->lock, flags);
@@ -1526,6 +1553,7 @@ again:
                                          get_pageblock_migratetype(page));
        }
 
+       __mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
        __count_zone_vm_events(PGALLOC, zone, 1 << order);
        zone_statistics(preferred_zone, zone, gfp_flags);
        local_irq_restore(flags);
@@ -1792,6 +1820,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
        bitmap_zero(zlc->fullzones, MAX_ZONES_PER_ZONELIST);
 }
 
+static bool zone_local(struct zone *local_zone, struct zone *zone)
+{
+       return node_distance(local_zone->node, zone->node) == LOCAL_DISTANCE;
+}
+
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
        return node_isset(local_zone->node, zone->zone_pgdat->reclaim_nodes);
@@ -1829,6 +1862,11 @@ static void zlc_clear_zones_full(struct zonelist *zonelist)
 {
 }
 
+static bool zone_local(struct zone *local_zone, struct zone *zone)
+{
+       return true;
+}
+
 static bool zone_allows_reclaim(struct zone *local_zone, struct zone *zone)
 {
        return true;
@@ -1860,16 +1898,41 @@ get_page_from_freelist(gfp_t gfp_mask, nodemask_t *nodemask, unsigned int order,
 zonelist_scan:
        /*
         * Scan zonelist, looking for a zone with enough free.
-        * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
+        * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
         */
        for_each_zone_zonelist_nodemask(zone, z, zonelist,
                                                high_zoneidx, nodemask) {
+               unsigned long mark;
+
                if (IS_ENABLED(CONFIG_NUMA) && zlc_active &&
                        !zlc_zone_worth_trying(zonelist, z, allowednodes))
                                continue;
                if ((alloc_flags & ALLOC_CPUSET) &&
                        !cpuset_zone_allowed_softwall(zone, gfp_mask))
                                continue;
+               BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
+               if (unlikely(alloc_flags & ALLOC_NO_WATERMARKS))
+                       goto try_this_zone;
+               /*
+                * Distribute pages in proportion to the individual
+                * zone size to ensure fair page aging.  The zone a
+                * page was allocated in should have no effect on the
+                * time the page has in memory before being reclaimed.
+                *
+                * When zone_reclaim_mode is enabled, try to stay in
+                * local zones in the fastpath.  If that fails, the
+                * slowpath is entered, which will do another pass
+                * starting with the local zones, but ultimately fall
+                * back to remote zones that do not partake in the
+                * fairness round-robin cycle of this zonelist.
+                */
+               if (alloc_flags & ALLOC_WMARK_LOW) {
+                       if (zone_page_state(zone, NR_ALLOC_BATCH) <= 0)
+                               continue;
+                       if (zone_reclaim_mode &&
+                           !zone_local(preferred_zone, zone))
+                               continue;
+               }
                /*
                 * When allocating a page cache page for writing, we
                 * want to get it from a zone that is within its dirty
@@ -1900,16 +1963,11 @@ zonelist_scan:
                    (gfp_mask & __GFP_WRITE) && !zone_dirty_ok(zone))
                        goto this_zone_full;
 
-               BUILD_BUG_ON(ALLOC_NO_WATERMARKS < NR_WMARK);
-               if (!(alloc_flags & ALLOC_NO_WATERMARKS)) {
-                       unsigned long mark;
+               mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
+               if (!zone_watermark_ok(zone, order, mark,
+                                      classzone_idx, alloc_flags)) {
                        int ret;
 
-                       mark = zone->watermark[alloc_flags & ALLOC_WMARK_MASK];
-                       if (zone_watermark_ok(zone, order, mark,
-                                   classzone_idx, alloc_flags))
-                               goto try_this_zone;
-
                        if (IS_ENABLED(CONFIG_NUMA) &&
                                        !did_zlc_setup && nr_online_nodes > 1) {
                                /*
@@ -2321,16 +2379,30 @@ __alloc_pages_high_priority(gfp_t gfp_mask, unsigned int order,
        return page;
 }
 
-static inline
-void wake_all_kswapd(unsigned int order, struct zonelist *zonelist,
-                                               enum zone_type high_zoneidx,
-                                               enum zone_type classzone_idx)
+static void prepare_slowpath(gfp_t gfp_mask, unsigned int order,
+                            struct zonelist *zonelist,
+                            enum zone_type high_zoneidx,
+                            struct zone *preferred_zone)
 {
        struct zoneref *z;
        struct zone *zone;
 
-       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx)
-               wakeup_kswapd(zone, order, classzone_idx);
+       for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
+               if (!(gfp_mask & __GFP_NO_KSWAPD))
+                       wakeup_kswapd(zone, order, zone_idx(preferred_zone));
+               /*
+                * Only reset the batches of zones that were actually
+                * considered in the fast path, we don't want to
+                * thrash fairness information for zones that are not
+                * actually part of this zonelist's round-robin cycle.
+                */
+               if (zone_reclaim_mode && !zone_local(preferred_zone, zone))
+                       continue;
+               mod_zone_page_state(zone, NR_ALLOC_BATCH,
+                                   high_wmark_pages(zone) -
+                                   low_wmark_pages(zone) -
+                                   zone_page_state(zone, NR_ALLOC_BATCH));
+       }
 }
 
 static inline int
@@ -2426,9 +2498,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                goto nopage;
 
 restart:
-       if (!(gfp_mask & __GFP_NO_KSWAPD))
-               wake_all_kswapd(order, zonelist, high_zoneidx,
-                                               zone_idx(preferred_zone));
+       prepare_slowpath(gfp_mask, order, zonelist,
+                        high_zoneidx, preferred_zone);
 
        /*
         * OK, we're below the kswapd watermark and have kicked background
@@ -3095,7 +3166,7 @@ void show_free_areas(unsigned int filter)
                        K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                        K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
                        zone->pages_scanned,
-                       (zone->all_unreclaimable ? "yes" : "no")
+                       (!zone_reclaimable(zone) ? "yes" : "no")
                        );
                printk("lowmem_reserve[]:");
                for (i = 0; i < MAX_NR_ZONES; i++)
@@ -3104,7 +3175,7 @@ void show_free_areas(unsigned int filter)
        }
 
        for_each_populated_zone(zone) {
-               unsigned long nr[MAX_ORDER], flags, order, total = 0;
+               unsigned long nr[MAX_ORDER], flags, order, total = 0;
                unsigned char types[MAX_ORDER];
 
                if (skip_free_areas_node(filter, zone_to_nid(zone)))
@@ -3416,11 +3487,11 @@ static void build_zonelists_in_zone_order(pg_data_t *pgdat, int nr_nodes)
 static int default_zonelist_order(void)
 {
        int nid, zone_type;
-       unsigned long low_kmem_size,total_size;
+       unsigned long low_kmem_size, total_size;
        struct zone *z;
        int average_size;
        /*
-         * ZONE_DMA and ZONE_DMA32 can be very small area in the system.
+        * ZONE_DMA and ZONE_DMA32 can be very small area in the system.
         * If they are really small and used heavily, the system can fall
         * into OOM very easily.
         * This function detect ZONE_DMA/DMA32 size and configures zone order.
@@ -3452,9 +3523,9 @@ static int default_zonelist_order(void)
                return ZONELIST_ORDER_NODE;
        /*
         * look into each node's config.
-        * If there is a node whose DMA/DMA32 memory is very big area on
-        * local memory, NODE_ORDER may be suitable.
-         */
+        * If there is a node whose DMA/DMA32 memory is very big area on
+        * local memory, NODE_ORDER may be suitable.
+        */
        average_size = total_size /
                                (nodes_weight(node_states[N_MEMORY]) + 1);
        for_each_online_node(nid) {
@@ -4180,7 +4251,7 @@ int zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
        if (!zone->wait_table)
                return -ENOMEM;
 
-       for(i = 0; i < zone->wait_table_hash_nr_entries; ++i)
+       for (i = 0; i < zone->wait_table_hash_nr_entries; ++i)
                init_waitqueue_head(zone->wait_table + i);
 
        return 0;
@@ -4237,7 +4308,7 @@ int __meminit init_currently_empty_zone(struct zone *zone,
 int __meminit __early_pfn_to_nid(unsigned long pfn)
 {
        unsigned long start_pfn, end_pfn;
-       int i, nid;
+       int nid;
        /*
         * NOTE: The following SMP-unsafe globals are only used early in boot
         * when the kernel is running single-threaded.
@@ -4248,15 +4319,14 @@ int __meminit __early_pfn_to_nid(unsigned long pfn)
        if (last_start_pfn <= pfn && pfn < last_end_pfn)
                return last_nid;
 
-       for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid)
-               if (start_pfn <= pfn && pfn < end_pfn) {
-                       last_start_pfn = start_pfn;
-                       last_end_pfn = end_pfn;
-                       last_nid = nid;
-                       return nid;
-               }
-       /* This is a memory hole */
-       return -1;
+       nid = memblock_search_pfn_nid(pfn, &start_pfn, &end_pfn);
+       if (nid != -1) {
+               last_start_pfn = start_pfn;
+               last_end_pfn = end_pfn;
+               last_nid = nid;
+       }
+
+       return nid;
 }
 #endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
 
@@ -4586,7 +4656,7 @@ static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
 
 /* Initialise the number of pages represented by NR_PAGEBLOCK_BITS */
-void __init set_pageblock_order(void)
+void __paginginit set_pageblock_order(void)
 {
        unsigned int order;
 
@@ -4614,7 +4684,7 @@ void __init set_pageblock_order(void)
  * include/linux/pageblock-flags.h for the values of pageblock_order based on
  * the kernel config
  */
-void __init set_pageblock_order(void)
+void __paginginit set_pageblock_order(void)
 {
 }
 
@@ -4728,8 +4798,11 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
                spin_lock_init(&zone->lru_lock);
                zone_seqlock_init(zone);
                zone->zone_pgdat = pgdat;
-
                zone_pcp_init(zone);
+
+               /* For bootup, initialized properly in watermark setup */
+               mod_zone_page_state(zone, NR_ALLOC_BATCH, zone->managed_pages);
+
                lruvec_init(&zone->lruvec);
                if (!size)
                        continue;
@@ -4930,7 +5003,7 @@ static unsigned long __init early_calculate_totalpages(void)
                if (pages)
                        node_set_state(nid, N_MEMORY);
        }
-       return totalpages;
+       return totalpages;
 }
 
 /*
@@ -5047,7 +5120,7 @@ restart:
                        /*
                         * Some kernelcore has been met, update counts and
                         * break if the kernelcore for this node has been
-                        * satisified
+                        * satisfied
                         */
                        required_kernelcore -= min(required_kernelcore,
                                                                size_pages);
@@ -5061,7 +5134,7 @@ restart:
         * If there is still required_kernelcore, we do another pass with one
         * less node in the count. This will push zone_movable_pfn[nid] further
         * along on the nodes that still have memory until kernelcore is
-        * satisified
+        * satisfied
         */
        usable_nodes--;
        if (usable_nodes && required_kernelcore > usable_nodes)
@@ -5286,8 +5359,10 @@ void __init mem_init_print_info(const char *str)
         * 3) .rodata.* may be embedded into .text or .data sections.
         */
 #define adj_init_size(start, end, size, pos, adj) \
-       if (start <= pos && pos < end && size > adj) \
-               size -= adj;
+       do { \
+               if (start <= pos && pos < end && size > adj) \
+                       size -= adj; \
+       } while (0)
 
        adj_init_size(__init_begin, __init_end, init_data_size,
                     _sinittext, init_code_size);
@@ -5361,7 +5436,7 @@ static int page_alloc_cpu_notify(struct notifier_block *self,
                 * This is only okay since the processor is dead and cannot
                 * race with what we are doing.
                 */
-               refresh_cpu_vm_stats(cpu);
+               cpu_vm_stats_fold(cpu);
        }
        return NOTIFY_OK;
 }
@@ -5498,6 +5573,11 @@ static void __setup_per_zone_wmarks(void)
                zone->watermark[WMARK_LOW]  = min_wmark_pages(zone) + (tmp >> 2);
                zone->watermark[WMARK_HIGH] = min_wmark_pages(zone) + (tmp >> 1);
 
+               __mod_zone_page_state(zone, NR_ALLOC_BATCH,
+                                     high_wmark_pages(zone) -
+                                     low_wmark_pages(zone) -
+                                     zone_page_state(zone, NR_ALLOC_BATCH));
+
                setup_zone_migrate_reserve(zone);
                spin_unlock_irqrestore(&zone->lock, flags);
        }
@@ -5570,7 +5650,7 @@ static void __meminit setup_per_zone_inactive_ratio(void)
  * we want it large (64MB max).  But it is not linear, because network
  * bandwidth does not increase linearly with machine size.  We use
  *
- *     min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy:
+ *     min_free_kbytes = 4 * sqrt(lowmem_kbytes), for better accuracy:
  *     min_free_kbytes = sqrt(lowmem_kbytes * 16)
  *
  * which yields
@@ -5614,11 +5694,11 @@ int __meminit init_per_zone_wmark_min(void)
 module_init(init_per_zone_wmark_min)
 
 /*
- * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so 
+ * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so
  *     that we can call two helper functions whenever min_free_kbytes
  *     changes.
  */
-int min_free_kbytes_sysctl_handler(ctl_table *table, int write, 
+int min_free_kbytes_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
 {
        proc_dointvec(table, write, buffer, length, ppos);
@@ -5682,8 +5762,8 @@ int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write,
 
 /*
  * percpu_pagelist_fraction - changes the pcp->high for each zone on each
- * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
- * can have before it gets flushed back to buddy allocator.
+ * cpu.  It is the fraction of total pages in each zone that a hot per cpu
+ * pagelist can have before it gets flushed back to buddy allocator.
  */
 int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
        void __user *buffer, size_t *length, loff_t *ppos)
@@ -5745,9 +5825,10 @@ void *__init alloc_large_system_hash(const char *tablename,
        if (!numentries) {
                /* round applicable memory size up to nearest megabyte */
                numentries = nr_kernel_pages;
-               numentries += (1UL << (20 - PAGE_SHIFT)) - 1;
-               numentries >>= 20 - PAGE_SHIFT;
-               numentries <<= 20 - PAGE_SHIFT;
+
+               /* It isn't necessary when PAGE_SIZE >= 1MB */
+               if (PAGE_SHIFT < 20)
+                       numentries = round_up(numentries, (1<<20)/PAGE_SIZE);
 
                /* limit to 1 bucket per 2^scale bytes of low memory */
                if (scale > PAGE_SHIFT)
@@ -5900,7 +5981,7 @@ void set_pageblock_flags_group(struct page *page, unsigned long flags,
  * This function checks whether pageblock includes unmovable pages or not.
  * If @count is not zero, it is okay to include less @count unmovable pages
  *
- * PageLRU check wihtout isolation or lru_lock could race so that
+ * PageLRU check without isolation or lru_lock could race so that
  * MIGRATE_MOVABLE block might include unmovable pages. It means you can't
  * expect this function should be exact.
  */
@@ -5928,6 +6009,17 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
                        continue;
 
                page = pfn_to_page(check);
+
+               /*
+                * Hugepages are not in LRU lists, but they're movable.
+                * We need not scan over tail pages bacause we don't
+                * handle each tail page individually in migration.
+                */
+               if (PageHuge(page)) {
+                       iter = round_up(iter + 1, 1<<compound_order(page)) - 1;
+                       continue;
+               }
+
                /*
                 * We can't use page_count without pin a page
                 * because another CPU can free compound page.
index ba05b64..8c79a47 100644 (file)
@@ -266,7 +266,6 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc,
 
                init_sync_kiocb(&kiocb, swap_file);
                kiocb.ki_pos = page_file_offset(page);
-               kiocb.ki_left = PAGE_SIZE;
                kiocb.ki_nbytes = PAGE_SIZE;
 
                set_page_writeback(page);
index 0cee10f..d1473b2 100644 (file)
@@ -6,6 +6,7 @@
 #include <linux/page-isolation.h>
 #include <linux/pageblock-flags.h>
 #include <linux/memory.h>
+#include <linux/hugetlb.h>
 #include "internal.h"
 
 int set_migratetype_isolate(struct page *page, bool skip_hwpoisoned_pages)
@@ -252,6 +253,19 @@ struct page *alloc_migrate_target(struct page *page, unsigned long private,
 {
        gfp_t gfp_mask = GFP_USER | __GFP_MOVABLE;
 
+       /*
+        * TODO: allocate a destination hugepage from a nearest neighbor node,
+        * accordance with memory policy of the user process if possible. For
+        * now as a simple work-around, we use the next node for destination.
+        */
+       if (PageHuge(page)) {
+               nodemask_t src = nodemask_of_node(page_to_nid(page));
+               nodemask_t dst;
+               nodes_complement(dst, src);
+               return alloc_huge_page_node(page_hstate(compound_head(page)),
+                                           next_node(page_to_nid(page), dst));
+       }
+
        if (PageHighMem(page))
                gfp_mask |= __GFP_HIGHMEM;
 
index e1a6e4f..3929a40 100644 (file)
 #include <asm/tlb.h>
 #include <asm-generic/pgtable.h>
 
+/*
+ * If a p?d_bad entry is found while walking page tables, report
+ * the error, before resetting entry to p?d_none.  Usually (but
+ * very seldom) called out from the p?d_none_or_clear_bad macros.
+ */
+
+void pgd_clear_bad(pgd_t *pgd)
+{
+       pgd_ERROR(*pgd);
+       pgd_clear(pgd);
+}
+
+void pud_clear_bad(pud_t *pud)
+{
+       pud_ERROR(*pud);
+       pud_clear(pud);
+}
+
+void pmd_clear_bad(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_clear(pmd);
+}
+
 #ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
 /*
  * Only sets the access flags (dirty, accessed), as well as write 
index 829a77c..e4ed041 100644 (file)
@@ -371,10 +371,10 @@ static int try_context_readahead(struct address_space *mapping,
        size = count_history_pages(mapping, ra, offset, max);
 
        /*
-        * no history pages:
+        * not enough history pages:
         * it could be a random read
         */
-       if (!size)
+       if (size <= req_size)
                return 0;
 
        /*
@@ -385,8 +385,8 @@ static int try_context_readahead(struct address_space *mapping,
                size *= 2;
 
        ra->start = offset;
-       ra->size = get_init_ra_size(size + req_size, max);
-       ra->async_size = ra->size;
+       ra->size = min(size + req_size, max);
+       ra->async_size = 1;
 
        return 1;
 }
index 07748e6..fd3ee7a 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1052,11 +1052,11 @@ void do_page_add_anon_rmap(struct page *page,
 {
        int first = atomic_inc_and_test(&page->_mapcount);
        if (first) {
-               if (!PageTransHuge(page))
-                       __inc_zone_page_state(page, NR_ANON_PAGES);
-               else
+               if (PageTransHuge(page))
                        __inc_zone_page_state(page,
                                              NR_ANON_TRANSPARENT_HUGEPAGES);
+               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+                               hpage_nr_pages(page));
        }
        if (unlikely(PageKsm(page)))
                return;
@@ -1085,10 +1085,10 @@ void page_add_new_anon_rmap(struct page *page,
        VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
        SetPageSwapBacked(page);
        atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
-       if (!PageTransHuge(page))
-               __inc_zone_page_state(page, NR_ANON_PAGES);
-       else
+       if (PageTransHuge(page))
                __inc_zone_page_state(page, NR_ANON_TRANSPARENT_HUGEPAGES);
+       __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+                       hpage_nr_pages(page));
        __page_set_anon_rmap(page, vma, address, 1);
        if (!mlocked_vma_newpage(vma, page)) {
                SetPageActive(page);
@@ -1111,7 +1111,7 @@ void page_add_file_rmap(struct page *page)
        mem_cgroup_begin_update_page_stat(page, &locked, &flags);
        if (atomic_inc_and_test(&page->_mapcount)) {
                __inc_zone_page_state(page, NR_FILE_MAPPED);
-               mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
+               mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
        }
        mem_cgroup_end_update_page_stat(page, &locked, &flags);
 }
@@ -1148,14 +1148,14 @@ void page_remove_rmap(struct page *page)
                goto out;
        if (anon) {
                mem_cgroup_uncharge_page(page);
-               if (!PageTransHuge(page))
-                       __dec_zone_page_state(page, NR_ANON_PAGES);
-               else
+               if (PageTransHuge(page))
                        __dec_zone_page_state(page,
                                              NR_ANON_TRANSPARENT_HUGEPAGES);
+               __mod_zone_page_state(page_zone(page), NR_ANON_PAGES,
+                               -hpage_nr_pages(page));
        } else {
                __dec_zone_page_state(page, NR_FILE_MAPPED);
-               mem_cgroup_dec_page_stat(page, MEMCG_NR_FILE_MAPPED);
+               mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
                mem_cgroup_end_update_page_stat(page, &locked, &flags);
        }
        if (unlikely(PageMlocked(page)))
index 5261498..8297623 100644 (file)
@@ -1205,7 +1205,7 @@ repeat:
                                                gfp & GFP_RECLAIM_MASK);
                if (error)
                        goto decused;
-               error = radix_tree_preload(gfp & GFP_RECLAIM_MASK);
+               error = radix_tree_maybe_preload(gfp & GFP_RECLAIM_MASK);
                if (!error) {
                        error = shmem_add_to_page_cache(page, mapping, index,
                                                        gfp, NULL);
@@ -2819,6 +2819,10 @@ int __init shmem_init(void)
 {
        int error;
 
+       /* If rootfs called this, don't re-init */
+       if (shmem_inode_cachep)
+               return 0;
+
        error = bdi_init(&shmem_backing_dev_info);
        if (error)
                goto out4;
index 538bade..a344327 100644 (file)
@@ -19,6 +19,7 @@
 #include <asm/tlbflush.h>
 #include <asm/page.h>
 #include <linux/memcontrol.h>
+#include <trace/events/kmem.h>
 
 #include "slab.h"
 
@@ -373,7 +374,7 @@ struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
 {
        int index;
 
-       if (size > KMALLOC_MAX_SIZE) {
+       if (unlikely(size > KMALLOC_MAX_SIZE)) {
                WARN_ON_ONCE(!(flags & __GFP_NOWARN));
                return NULL;
        }
@@ -495,6 +496,15 @@ void __init create_kmalloc_caches(unsigned long flags)
 }
 #endif /* !CONFIG_SLOB */
 
+#ifdef CONFIG_TRACING
+void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
+{
+       void *ret = kmalloc_order(size, flags, order);
+       trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
+       return ret;
+}
+EXPORT_SYMBOL(kmalloc_order_trace);
+#endif
 
 #ifdef CONFIG_SLABINFO
 
index 91bd3f2..4bf8809 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -462,11 +462,11 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
        return ret;
 }
 
-void *__kmalloc_node(size_t size, gfp_t gfp, int node)
+void *__kmalloc(size_t size, gfp_t gfp)
 {
-       return __do_kmalloc_node(size, gfp, node, _RET_IP_);
+       return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, _RET_IP_);
 }
-EXPORT_SYMBOL(__kmalloc_node);
+EXPORT_SYMBOL(__kmalloc);
 
 #ifdef CONFIG_TRACING
 void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
@@ -534,7 +534,7 @@ int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
        return 0;
 }
 
-void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
+void *slob_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
        void *b;
 
@@ -560,7 +560,27 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
        kmemleak_alloc_recursive(b, c->size, 1, c->flags, flags);
        return b;
 }
+EXPORT_SYMBOL(slob_alloc_node);
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+{
+       return slob_alloc_node(cachep, flags, NUMA_NO_NODE);
+}
+EXPORT_SYMBOL(kmem_cache_alloc);
+
+#ifdef CONFIG_NUMA
+void *__kmalloc_node(size_t size, gfp_t gfp, int node)
+{
+       return __do_kmalloc_node(size, gfp, node, _RET_IP_);
+}
+EXPORT_SYMBOL(__kmalloc_node);
+
+void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t gfp, int node)
+{
+       return slob_alloc_node(cachep, gfp, node);
+}
 EXPORT_SYMBOL(kmem_cache_alloc_node);
+#endif
 
 static void __kmem_cache_free(void *b, int size)
 {
index e3ba1f2..c3eb3d3 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -373,7 +373,8 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page
 #endif
        {
                slab_lock(page);
-               if (page->freelist == freelist_old && page->counters == counters_old) {
+               if (page->freelist == freelist_old &&
+                                       page->counters == counters_old) {
                        page->freelist = freelist_new;
                        page->counters = counters_new;
                        slab_unlock(page);
@@ -411,7 +412,8 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
 
                local_irq_save(flags);
                slab_lock(page);
-               if (page->freelist == freelist_old && page->counters == counters_old) {
+               if (page->freelist == freelist_old &&
+                                       page->counters == counters_old) {
                        page->freelist = freelist_new;
                        page->counters = counters_new;
                        slab_unlock(page);
@@ -553,8 +555,9 @@ static void print_tracking(struct kmem_cache *s, void *object)
 
 static void print_page_info(struct page *page)
 {
-       printk(KERN_ERR "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
-               page, page->objects, page->inuse, page->freelist, page->flags);
+       printk(KERN_ERR
+              "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n",
+              page, page->objects, page->inuse, page->freelist, page->flags);
 
 }
 
@@ -629,7 +632,8 @@ static void object_err(struct kmem_cache *s, struct page *page,
        print_trailer(s, page, object);
 }
 
-static void slab_err(struct kmem_cache *s, struct page *page, const char *fmt, ...)
+static void slab_err(struct kmem_cache *s, struct page *page,
+                       const char *fmt, ...)
 {
        va_list args;
        char buf[100];
@@ -788,7 +792,8 @@ static int check_object(struct kmem_cache *s, struct page *page,
        } else {
                if ((s->flags & SLAB_POISON) && s->object_size < s->inuse) {
                        check_bytes_and_report(s, page, p, "Alignment padding",
-                               endobject, POISON_INUSE, s->inuse - s->object_size);
+                               endobject, POISON_INUSE,
+                               s->inuse - s->object_size);
                }
        }
 
@@ -873,7 +878,6 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
                                object_err(s, page, object,
                                        "Freechain corrupt");
                                set_freepointer(s, object, NULL);
-                               break;
                        } else {
                                slab_err(s, page, "Freepointer corrupt");
                                page->freelist = NULL;
@@ -918,7 +922,8 @@ static void trace(struct kmem_cache *s, struct page *page, void *object,
                        page->freelist);
 
                if (!alloc)
-                       print_section("Object ", (void *)object, s->object_size);
+                       print_section("Object ", (void *)object,
+                                       s->object_size);
 
                dump_stack();
        }
@@ -937,7 +942,8 @@ static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
        return should_failslab(s->object_size, flags, s->flags);
 }
 
-static inline void slab_post_alloc_hook(struct kmem_cache *s, gfp_t flags, void *object)
+static inline void slab_post_alloc_hook(struct kmem_cache *s,
+                                       gfp_t flags, void *object)
 {
        flags &= gfp_allowed_mask;
        kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
@@ -1039,7 +1045,8 @@ static void setup_object_debug(struct kmem_cache *s, struct page *page,
        init_tracking(s, object);
 }
 
-static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *page,
+static noinline int alloc_debug_processing(struct kmem_cache *s,
+                                       struct page *page,
                                        void *object, unsigned long addr)
 {
        if (!check_slab(s, page))
@@ -1743,7 +1750,8 @@ static void init_kmem_cache_cpus(struct kmem_cache *s)
 /*
  * Remove the cpu slab
  */
-static void deactivate_slab(struct kmem_cache *s, struct page *page, void *freelist)
+static void deactivate_slab(struct kmem_cache *s, struct page *page,
+                               void *freelist)
 {
        enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
        struct kmem_cache_node *n = get_node(s, page_to_nid(page));
@@ -1999,7 +2007,8 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                page->pobjects = pobjects;
                page->next = oldpage;
 
-       } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
+       } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
+                                                               != oldpage);
 #endif
 }
 
@@ -2169,8 +2178,8 @@ static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags)
 }
 
 /*
- * Check the page->freelist of a page and either transfer the freelist to the per cpu freelist
- * or deactivate the page.
+ * Check the page->freelist of a page and either transfer the freelist to the
+ * per cpu freelist or deactivate the page.
  *
  * The page is still frozen if the return value is not NULL.
  *
@@ -2314,7 +2323,8 @@ new_slab:
                goto load_freelist;
 
        /* Only entered in the debug case */
-       if (kmem_cache_debug(s) && !alloc_debug_processing(s, page, freelist, addr))
+       if (kmem_cache_debug(s) &&
+                       !alloc_debug_processing(s, page, freelist, addr))
                goto new_slab;  /* Slab failed checks. Next slab needed */
 
        deactivate_slab(s, page, get_freepointer(s, freelist));
@@ -2372,7 +2382,7 @@ redo:
 
        object = c->freelist;
        page = c->page;
-       if (unlikely(!object || !page || !node_match(page, node)))
+       if (unlikely(!object || !node_match(page, node)))
                object = __slab_alloc(s, gfpflags, node, addr, c);
 
        else {
@@ -2382,13 +2392,15 @@ redo:
                 * The cmpxchg will only match if there was no additional
                 * operation and if we are on the right processor.
                 *
-                * The cmpxchg does the following atomically (without lock semantics!)
+                * The cmpxchg does the following atomically (without lock
+                * semantics!)
                 * 1. Relocate first pointer to the current per cpu area.
                 * 2. Verify that tid and freelist have not been changed
                 * 3. If they were not changed replace tid and freelist
                 *
-                * Since this is without lock semantics the protection is only against
-                * code executing on this cpu *not* from access by other cpus.
+                * Since this is without lock semantics the protection is only
+                * against code executing on this cpu *not* from access by
+                * other cpus.
                 */
                if (unlikely(!this_cpu_cmpxchg_double(
                                s->cpu_slab->freelist, s->cpu_slab->tid,
@@ -2420,7 +2432,8 @@ void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
        void *ret = slab_alloc(s, gfpflags, _RET_IP_);
 
-       trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size, s->size, gfpflags);
+       trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
+                               s->size, gfpflags);
 
        return ret;
 }
@@ -2434,14 +2447,6 @@ void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
        return ret;
 }
 EXPORT_SYMBOL(kmem_cache_alloc_trace);
-
-void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order)
-{
-       void *ret = kmalloc_order(size, flags, order);
-       trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags);
-       return ret;
-}
-EXPORT_SYMBOL(kmalloc_order_trace);
 #endif
 
 #ifdef CONFIG_NUMA
@@ -2512,8 +2517,10 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                        if (kmem_cache_has_cpu_partial(s) && !prior)
 
                                /*
-                                * Slab was on no list before and will be partially empty
-                                * We can defer the list move and instead freeze it.
+                                * Slab was on no list before and will be
+                                * partially empty
+                                * We can defer the list move and instead
+                                * freeze it.
                                 */
                                new.frozen = 1;
 
@@ -3071,8 +3078,8 @@ static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
         * A) The number of objects from per cpu partial slabs dumped to the
         *    per node list when we reach the limit.
         * B) The number of objects in cpu partial slabs to extract from the
-        *    per node list when we run out of per cpu objects. We only fetch 50%
-        *    to keep some capacity around for frees.
+        *    per node list when we run out of per cpu objects. We only fetch
+        *    50% to keep some capacity around for frees.
         */
        if (!kmem_cache_has_cpu_partial(s))
                s->cpu_partial = 0;
@@ -3099,8 +3106,8 @@ error:
        if (flags & SLAB_PANIC)
                panic("Cannot create slab %s size=%lu realsize=%u "
                        "order=%u offset=%u flags=%lx\n",
-                       s->name, (unsigned long)s->size, s->size, oo_order(s->oo),
-                       s->offset, flags);
+                       s->name, (unsigned long)s->size, s->size,
+                       oo_order(s->oo), s->offset, flags);
        return -EINVAL;
 }
 
@@ -3316,42 +3323,6 @@ size_t ksize(const void *object)
 }
 EXPORT_SYMBOL(ksize);
 
-#ifdef CONFIG_SLUB_DEBUG
-bool verify_mem_not_deleted(const void *x)
-{
-       struct page *page;
-       void *object = (void *)x;
-       unsigned long flags;
-       bool rv;
-
-       if (unlikely(ZERO_OR_NULL_PTR(x)))
-               return false;
-
-       local_irq_save(flags);
-
-       page = virt_to_head_page(x);
-       if (unlikely(!PageSlab(page))) {
-               /* maybe it was from stack? */
-               rv = true;
-               goto out_unlock;
-       }
-
-       slab_lock(page);
-       if (on_freelist(page->slab_cache, page, object)) {
-               object_err(page->slab_cache, page, object, "Object is on free-list");
-               rv = false;
-       } else {
-               rv = true;
-       }
-       slab_unlock(page);
-
-out_unlock:
-       local_irq_restore(flags);
-       return rv;
-}
-EXPORT_SYMBOL(verify_mem_not_deleted);
-#endif
-
 void kfree(const void *x)
 {
        struct page *page;
@@ -4162,15 +4133,17 @@ static int list_locations(struct kmem_cache *s, char *buf,
                                !cpumask_empty(to_cpumask(l->cpus)) &&
                                len < PAGE_SIZE - 60) {
                        len += sprintf(buf + len, " cpus=");
-                       len += cpulist_scnprintf(buf + len, PAGE_SIZE - len - 50,
+                       len += cpulist_scnprintf(buf + len,
+                                                PAGE_SIZE - len - 50,
                                                 to_cpumask(l->cpus));
                }
 
                if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
                                len < PAGE_SIZE - 60) {
                        len += sprintf(buf + len, " nodes=");
-                       len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
-                                       l->nodes);
+                       len += nodelist_scnprintf(buf + len,
+                                                 PAGE_SIZE - len - 50,
+                                                 l->nodes);
                }
 
                len += sprintf(buf + len, "\n");
@@ -4268,18 +4241,17 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
        int node;
        int x;
        unsigned long *nodes;
-       unsigned long *per_cpu;
 
-       nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
+       nodes = kzalloc(sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
        if (!nodes)
                return -ENOMEM;
-       per_cpu = nodes + nr_node_ids;
 
        if (flags & SO_CPU) {
                int cpu;
 
                for_each_possible_cpu(cpu) {
-                       struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
+                       struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab,
+                                                              cpu);
                        int node;
                        struct page *page;
 
@@ -4304,8 +4276,6 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                                total += x;
                                nodes[node] += x;
                        }
-
-                       per_cpu[node]++;
                }
        }
 
@@ -4315,12 +4285,11 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                for_each_node_state(node, N_NORMAL_MEMORY) {
                        struct kmem_cache_node *n = get_node(s, node);
 
-               if (flags & SO_TOTAL)
-                       x = atomic_long_read(&n->total_objects);
-               else if (flags & SO_OBJECTS)
-                       x = atomic_long_read(&n->total_objects) -
-                               count_partial(n, count_free);
-
+                       if (flags & SO_TOTAL)
+                               x = atomic_long_read(&n->total_objects);
+                       else if (flags & SO_OBJECTS)
+                               x = atomic_long_read(&n->total_objects) -
+                                       count_partial(n, count_free);
                        else
                                x = atomic_long_read(&n->nr_slabs);
                        total += x;
@@ -4420,7 +4389,7 @@ static ssize_t order_store(struct kmem_cache *s,
        unsigned long order;
        int err;
 
-       err = strict_strtoul(buf, 10, &order);
+       err = kstrtoul(buf, 10, &order);
        if (err)
                return err;
 
@@ -4448,7 +4417,7 @@ static ssize_t min_partial_store(struct kmem_cache *s, const char *buf,
        unsigned long min;
        int err;
 
-       err = strict_strtoul(buf, 10, &min);
+       err = kstrtoul(buf, 10, &min);
        if (err)
                return err;
 
@@ -4468,7 +4437,7 @@ static ssize_t cpu_partial_store(struct kmem_cache *s, const char *buf,
        unsigned long objects;
        int err;
 
-       err = strict_strtoul(buf, 10, &objects);
+       err = kstrtoul(buf, 10, &objects);
        if (err)
                return err;
        if (objects && !kmem_cache_has_cpu_partial(s))
@@ -4784,7 +4753,7 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
        unsigned long ratio;
        int err;
 
-       err = strict_strtoul(buf, 10, &ratio);
+       err = kstrtoul(buf, 10, &ratio);
        if (err)
                return err;
 
@@ -5136,7 +5105,8 @@ static char *create_unique_id(struct kmem_cache *s)
 
 #ifdef CONFIG_MEMCG_KMEM
        if (!is_root_cache(s))
-               p += sprintf(p, "-%08d", memcg_cache_id(s->memcg_params->memcg));
+               p += sprintf(p, "-%08d",
+                               memcg_cache_id(s->memcg_params->memcg));
 #endif
 
        BUG_ON(p > name + ID_STR_LENGTH - 1);
index 308d503..4ac1d7e 100644 (file)
@@ -339,13 +339,14 @@ static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
-static void __init sparse_early_usemaps_alloc_node(unsigned long**usemap_map,
+static void __init sparse_early_usemaps_alloc_node(void *data,
                                 unsigned long pnum_begin,
                                 unsigned long pnum_end,
                                 unsigned long usemap_count, int nodeid)
 {
        void *usemap;
        unsigned long pnum;
+       unsigned long **usemap_map = (unsigned long **)data;
        int size = usemap_size();
 
        usemap = sparse_early_usemaps_alloc_pgdat_section(NODE_DATA(nodeid),
@@ -430,11 +431,12 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
 #endif /* !CONFIG_SPARSEMEM_VMEMMAP */
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-static void __init sparse_early_mem_maps_alloc_node(struct page **map_map,
+static void __init sparse_early_mem_maps_alloc_node(void *data,
                                 unsigned long pnum_begin,
                                 unsigned long pnum_end,
                                 unsigned long map_count, int nodeid)
 {
+       struct page **map_map = (struct page **)data;
        sparse_mem_maps_populate_node(map_map, pnum_begin, pnum_end,
                                         map_count, nodeid);
 }
@@ -460,6 +462,55 @@ void __attribute__((weak)) __meminit vmemmap_populate_print_last(void)
 {
 }
 
+/**
+ *  alloc_usemap_and_memmap - memory alloction for pageblock flags and vmemmap
+ *  @map: usemap_map for pageblock flags or mmap_map for vmemmap
+ */
+static void __init alloc_usemap_and_memmap(void (*alloc_func)
+                                       (void *, unsigned long, unsigned long,
+                                       unsigned long, int), void *data)
+{
+       unsigned long pnum;
+       unsigned long map_count;
+       int nodeid_begin = 0;
+       unsigned long pnum_begin = 0;
+
+       for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
+               struct mem_section *ms;
+
+               if (!present_section_nr(pnum))
+                       continue;
+               ms = __nr_to_section(pnum);
+               nodeid_begin = sparse_early_nid(ms);
+               pnum_begin = pnum;
+               break;
+       }
+       map_count = 1;
+       for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
+               struct mem_section *ms;
+               int nodeid;
+
+               if (!present_section_nr(pnum))
+                       continue;
+               ms = __nr_to_section(pnum);
+               nodeid = sparse_early_nid(ms);
+               if (nodeid == nodeid_begin) {
+                       map_count++;
+                       continue;
+               }
+               /* ok, we need to take cake of from pnum_begin to pnum - 1*/
+               alloc_func(data, pnum_begin, pnum,
+                                               map_count, nodeid_begin);
+               /* new start, update count etc*/
+               nodeid_begin = nodeid;
+               pnum_begin = pnum;
+               map_count = 1;
+       }
+       /* ok, last chunk */
+       alloc_func(data, pnum_begin, NR_MEM_SECTIONS,
+                                               map_count, nodeid_begin);
+}
+
 /*
  * Allocate the accumulated non-linear sections, allocate a mem_map
  * for each and record the physical to section mapping.
@@ -471,11 +522,7 @@ void __init sparse_init(void)
        unsigned long *usemap;
        unsigned long **usemap_map;
        int size;
-       int nodeid_begin = 0;
-       unsigned long pnum_begin = 0;
-       unsigned long usemap_count;
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
-       unsigned long map_count;
        int size2;
        struct page **map_map;
 #endif
@@ -501,82 +548,16 @@ void __init sparse_init(void)
        usemap_map = alloc_bootmem(size);
        if (!usemap_map)
                panic("can not allocate usemap_map\n");
-
-       for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
-               struct mem_section *ms;
-
-               if (!present_section_nr(pnum))
-                       continue;
-               ms = __nr_to_section(pnum);
-               nodeid_begin = sparse_early_nid(ms);
-               pnum_begin = pnum;
-               break;
-       }
-       usemap_count = 1;
-       for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
-               struct mem_section *ms;
-               int nodeid;
-
-               if (!present_section_nr(pnum))
-                       continue;
-               ms = __nr_to_section(pnum);
-               nodeid = sparse_early_nid(ms);
-               if (nodeid == nodeid_begin) {
-                       usemap_count++;
-                       continue;
-               }
-               /* ok, we need to take cake of from pnum_begin to pnum - 1*/
-               sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, pnum,
-                                                usemap_count, nodeid_begin);
-               /* new start, update count etc*/
-               nodeid_begin = nodeid;
-               pnum_begin = pnum;
-               usemap_count = 1;
-       }
-       /* ok, last chunk */
-       sparse_early_usemaps_alloc_node(usemap_map, pnum_begin, NR_MEM_SECTIONS,
-                                        usemap_count, nodeid_begin);
+       alloc_usemap_and_memmap(sparse_early_usemaps_alloc_node,
+                                                       (void *)usemap_map);
 
 #ifdef CONFIG_SPARSEMEM_ALLOC_MEM_MAP_TOGETHER
        size2 = sizeof(struct page *) * NR_MEM_SECTIONS;
        map_map = alloc_bootmem(size2);
        if (!map_map)
                panic("can not allocate map_map\n");
-
-       for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
-               struct mem_section *ms;
-
-               if (!present_section_nr(pnum))
-                       continue;
-               ms = __nr_to_section(pnum);
-               nodeid_begin = sparse_early_nid(ms);
-               pnum_begin = pnum;
-               break;
-       }
-       map_count = 1;
-       for (pnum = pnum_begin + 1; pnum < NR_MEM_SECTIONS; pnum++) {
-               struct mem_section *ms;
-               int nodeid;
-
-               if (!present_section_nr(pnum))
-                       continue;
-               ms = __nr_to_section(pnum);
-               nodeid = sparse_early_nid(ms);
-               if (nodeid == nodeid_begin) {
-                       map_count++;
-                       continue;
-               }
-               /* ok, we need to take cake of from pnum_begin to pnum - 1*/
-               sparse_early_mem_maps_alloc_node(map_map, pnum_begin, pnum,
-                                                map_count, nodeid_begin);
-               /* new start, update count etc*/
-               nodeid_begin = nodeid;
-               pnum_begin = pnum;
-               map_count = 1;
-       }
-       /* ok, last chunk */
-       sparse_early_mem_maps_alloc_node(map_map, pnum_begin, NR_MEM_SECTIONS,
-                                        map_count, nodeid_begin);
+       alloc_usemap_and_memmap(sparse_early_mem_maps_alloc_node,
+                                                       (void *)map_map);
 #endif
 
        for (pnum = 0; pnum < NR_MEM_SECTIONS; pnum++) {
index 62b78a6..759c3ca 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -31,6 +31,7 @@
 #include <linux/memcontrol.h>
 #include <linux/gfp.h>
 #include <linux/uio.h>
+#include <linux/hugetlb.h>
 
 #include "internal.h"
 
@@ -81,6 +82,19 @@ static void __put_compound_page(struct page *page)
 
 static void put_compound_page(struct page *page)
 {
+       /*
+        * hugetlbfs pages cannot be split from under us.  If this is a
+        * hugetlbfs page, check refcount on head page and release the page if
+        * the refcount becomes zero.
+        */
+       if (PageHuge(page)) {
+               page = compound_head(page);
+               if (put_page_testzero(page))
+                       __put_compound_page(page);
+
+               return;
+       }
+
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
                struct page *page_head = compound_trans_head(page);
@@ -184,38 +198,51 @@ bool __get_page_tail(struct page *page)
         * proper PT lock that already serializes against
         * split_huge_page().
         */
-       unsigned long flags;
        bool got = false;
-       struct page *page_head = compound_trans_head(page);
+       struct page *page_head;
 
-       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+       /*
+        * If this is a hugetlbfs page it cannot be split under us.  Simply
+        * increment refcount for the head page.
+        */
+       if (PageHuge(page)) {
+               page_head = compound_head(page);
+               atomic_inc(&page_head->_count);
+               got = true;
+       } else {
+               unsigned long flags;
 
-               /* Ref to put_compound_page() comment. */
-               if (PageSlab(page_head)) {
+               page_head = compound_trans_head(page);
+               if (likely(page != page_head &&
+                                       get_page_unless_zero(page_head))) {
+
+                       /* Ref to put_compound_page() comment. */
+                       if (PageSlab(page_head)) {
+                               if (likely(PageTail(page))) {
+                                       __get_page_tail_foll(page, false);
+                                       return true;
+                               } else {
+                                       put_page(page_head);
+                                       return false;
+                               }
+                       }
+
+                       /*
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+                        * we obtain the lock. That is ok as long as it
+                        * can't be freed from under us.
+                        */
+                       flags = compound_lock_irqsave(page_head);
+                       /* here __split_huge_page_refcount won't run anymore */
                        if (likely(PageTail(page))) {
                                __get_page_tail_foll(page, false);
-                               return true;
-                       } else {
-                               put_page(page_head);
-                               return false;
+                               got = true;
                        }
+                       compound_unlock_irqrestore(page_head, flags);
+                       if (unlikely(!got))
+                               put_page(page_head);
                }
-
-               /*
-                * page_head wasn't a dangling pointer but it
-                * may not be a head page anymore by the time
-                * we obtain the lock. That is ok as long as it
-                * can't be freed from under us.
-                */
-               flags = compound_lock_irqsave(page_head);
-               /* here __split_huge_page_refcount won't run anymore */
-               if (likely(PageTail(page))) {
-                       __get_page_tail_foll(page, false);
-                       got = true;
-               }
-               compound_unlock_irqrestore(page_head, flags);
-               if (unlikely(!got))
-                       put_page(page_head);
        }
        return got;
 }
@@ -405,6 +432,11 @@ static void activate_page_drain(int cpu)
                pagevec_lru_move_fn(pvec, __activate_page, NULL);
 }
 
+static bool need_activate_page_drain(int cpu)
+{
+       return pagevec_count(&per_cpu(activate_page_pvecs, cpu)) != 0;
+}
+
 void activate_page(struct page *page)
 {
        if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
@@ -422,6 +454,11 @@ static inline void activate_page_drain(int cpu)
 {
 }
 
+static bool need_activate_page_drain(int cpu)
+{
+       return false;
+}
+
 void activate_page(struct page *page)
 {
        struct zone *zone = page_zone(page);
@@ -674,12 +711,36 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
        lru_add_drain();
 }
 
-/*
- * Returns 0 for success
- */
-int lru_add_drain_all(void)
+static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
+
+void lru_add_drain_all(void)
 {
-       return schedule_on_each_cpu(lru_add_drain_per_cpu);
+       static DEFINE_MUTEX(lock);
+       static struct cpumask has_work;
+       int cpu;
+
+       mutex_lock(&lock);
+       get_online_cpus();
+       cpumask_clear(&has_work);
+
+       for_each_online_cpu(cpu) {
+               struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
+
+               if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
+                   pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
+                   pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
+                   need_activate_page_drain(cpu)) {
+                       INIT_WORK(work, lru_add_drain_per_cpu);
+                       schedule_work_on(cpu, work);
+                       cpumask_set_cpu(cpu, &has_work);
+               }
+       }
+
+       for_each_cpu(cpu, &has_work)
+               flush_work(&per_cpu(lru_add_drain_work, cpu));
+
+       put_online_cpus();
+       mutex_unlock(&lock);
 }
 
 /*
index f24ab0d..e6f15f8 100644 (file)
@@ -122,7 +122,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry, gfp_t gfp_mask)
 {
        int error;
 
-       error = radix_tree_preload(gfp_mask);
+       error = radix_tree_maybe_preload(gfp_mask);
        if (!error) {
                error = __add_to_swap_cache(page, entry);
                radix_tree_preload_end();
@@ -328,7 +328,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                /*
                 * call radix_tree_preload() while we can wait.
                 */
-               err = radix_tree_preload(gfp_mask & GFP_KERNEL);
+               err = radix_tree_maybe_preload(gfp_mask & GFP_KERNEL);
                if (err)
                        break;
 
index 6cf2e60..3963fc2 100644 (file)
@@ -175,14 +175,296 @@ static void discard_swap_cluster(struct swap_info_struct *si,
        }
 }
 
-static int wait_for_discard(void *word)
+#define SWAPFILE_CLUSTER       256
+#define LATENCY_LIMIT          256
+
+static inline void cluster_set_flag(struct swap_cluster_info *info,
+       unsigned int flag)
 {
-       schedule();
-       return 0;
+       info->flags = flag;
 }
 
-#define SWAPFILE_CLUSTER       256
-#define LATENCY_LIMIT          256
+static inline unsigned int cluster_count(struct swap_cluster_info *info)
+{
+       return info->data;
+}
+
+static inline void cluster_set_count(struct swap_cluster_info *info,
+                                    unsigned int c)
+{
+       info->data = c;
+}
+
+static inline void cluster_set_count_flag(struct swap_cluster_info *info,
+                                        unsigned int c, unsigned int f)
+{
+       info->flags = f;
+       info->data = c;
+}
+
+static inline unsigned int cluster_next(struct swap_cluster_info *info)
+{
+       return info->data;
+}
+
+static inline void cluster_set_next(struct swap_cluster_info *info,
+                                   unsigned int n)
+{
+       info->data = n;
+}
+
+static inline void cluster_set_next_flag(struct swap_cluster_info *info,
+                                        unsigned int n, unsigned int f)
+{
+       info->flags = f;
+       info->data = n;
+}
+
+static inline bool cluster_is_free(struct swap_cluster_info *info)
+{
+       return info->flags & CLUSTER_FLAG_FREE;
+}
+
+static inline bool cluster_is_null(struct swap_cluster_info *info)
+{
+       return info->flags & CLUSTER_FLAG_NEXT_NULL;
+}
+
+static inline void cluster_set_null(struct swap_cluster_info *info)
+{
+       info->flags = CLUSTER_FLAG_NEXT_NULL;
+       info->data = 0;
+}
+
+/* Add a cluster to discard list and schedule it to do discard */
+static void swap_cluster_schedule_discard(struct swap_info_struct *si,
+               unsigned int idx)
+{
+       /*
+        * If scan_swap_map() can't find a free cluster, it will check
+        * si->swap_map directly. To make sure the discarding cluster isn't
+        * taken by scan_swap_map(), mark the swap entries bad (occupied). It
+        * will be cleared after discard
+        */
+       memset(si->swap_map + idx * SWAPFILE_CLUSTER,
+                       SWAP_MAP_BAD, SWAPFILE_CLUSTER);
+
+       if (cluster_is_null(&si->discard_cluster_head)) {
+               cluster_set_next_flag(&si->discard_cluster_head,
+                                               idx, 0);
+               cluster_set_next_flag(&si->discard_cluster_tail,
+                                               idx, 0);
+       } else {
+               unsigned int tail = cluster_next(&si->discard_cluster_tail);
+               cluster_set_next(&si->cluster_info[tail], idx);
+               cluster_set_next_flag(&si->discard_cluster_tail,
+                                               idx, 0);
+       }
+
+       schedule_work(&si->discard_work);
+}
+
+/*
+ * Doing discard actually. After a cluster discard is finished, the cluster
+ * will be added to free cluster list. caller should hold si->lock.
+*/
+static void swap_do_scheduled_discard(struct swap_info_struct *si)
+{
+       struct swap_cluster_info *info;
+       unsigned int idx;
+
+       info = si->cluster_info;
+
+       while (!cluster_is_null(&si->discard_cluster_head)) {
+               idx = cluster_next(&si->discard_cluster_head);
+
+               cluster_set_next_flag(&si->discard_cluster_head,
+                                               cluster_next(&info[idx]), 0);
+               if (cluster_next(&si->discard_cluster_tail) == idx) {
+                       cluster_set_null(&si->discard_cluster_head);
+                       cluster_set_null(&si->discard_cluster_tail);
+               }
+               spin_unlock(&si->lock);
+
+               discard_swap_cluster(si, idx * SWAPFILE_CLUSTER,
+                               SWAPFILE_CLUSTER);
+
+               spin_lock(&si->lock);
+               cluster_set_flag(&info[idx], CLUSTER_FLAG_FREE);
+               if (cluster_is_null(&si->free_cluster_head)) {
+                       cluster_set_next_flag(&si->free_cluster_head,
+                                               idx, 0);
+                       cluster_set_next_flag(&si->free_cluster_tail,
+                                               idx, 0);
+               } else {
+                       unsigned int tail;
+
+                       tail = cluster_next(&si->free_cluster_tail);
+                       cluster_set_next(&info[tail], idx);
+                       cluster_set_next_flag(&si->free_cluster_tail,
+                                               idx, 0);
+               }
+               memset(si->swap_map + idx * SWAPFILE_CLUSTER,
+                               0, SWAPFILE_CLUSTER);
+       }
+}
+
+static void swap_discard_work(struct work_struct *work)
+{
+       struct swap_info_struct *si;
+
+       si = container_of(work, struct swap_info_struct, discard_work);
+
+       spin_lock(&si->lock);
+       swap_do_scheduled_discard(si);
+       spin_unlock(&si->lock);
+}
+
+/*
+ * The cluster corresponding to page_nr will be used. The cluster will be
+ * removed from free cluster list and its usage counter will be increased.
+ */
+static void inc_cluster_info_page(struct swap_info_struct *p,
+       struct swap_cluster_info *cluster_info, unsigned long page_nr)
+{
+       unsigned long idx = page_nr / SWAPFILE_CLUSTER;
+
+       if (!cluster_info)
+               return;
+       if (cluster_is_free(&cluster_info[idx])) {
+               VM_BUG_ON(cluster_next(&p->free_cluster_head) != idx);
+               cluster_set_next_flag(&p->free_cluster_head,
+                       cluster_next(&cluster_info[idx]), 0);
+               if (cluster_next(&p->free_cluster_tail) == idx) {
+                       cluster_set_null(&p->free_cluster_tail);
+                       cluster_set_null(&p->free_cluster_head);
+               }
+               cluster_set_count_flag(&cluster_info[idx], 0, 0);
+       }
+
+       VM_BUG_ON(cluster_count(&cluster_info[idx]) >= SWAPFILE_CLUSTER);
+       cluster_set_count(&cluster_info[idx],
+               cluster_count(&cluster_info[idx]) + 1);
+}
+
+/*
+ * The cluster corresponding to page_nr decreases one usage. If the usage
+ * counter becomes 0, which means no page in the cluster is in using, we can
+ * optionally discard the cluster and add it to free cluster list.
+ */
+static void dec_cluster_info_page(struct swap_info_struct *p,
+       struct swap_cluster_info *cluster_info, unsigned long page_nr)
+{
+       unsigned long idx = page_nr / SWAPFILE_CLUSTER;
+
+       if (!cluster_info)
+               return;
+
+       VM_BUG_ON(cluster_count(&cluster_info[idx]) == 0);
+       cluster_set_count(&cluster_info[idx],
+               cluster_count(&cluster_info[idx]) - 1);
+
+       if (cluster_count(&cluster_info[idx]) == 0) {
+               /*
+                * If the swap is discardable, prepare discard the cluster
+                * instead of free it immediately. The cluster will be freed
+                * after discard.
+                */
+               if ((p->flags & (SWP_WRITEOK | SWP_PAGE_DISCARD)) ==
+                                (SWP_WRITEOK | SWP_PAGE_DISCARD)) {
+                       swap_cluster_schedule_discard(p, idx);
+                       return;
+               }
+
+               cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
+               if (cluster_is_null(&p->free_cluster_head)) {
+                       cluster_set_next_flag(&p->free_cluster_head, idx, 0);
+                       cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
+               } else {
+                       unsigned int tail = cluster_next(&p->free_cluster_tail);
+                       cluster_set_next(&cluster_info[tail], idx);
+                       cluster_set_next_flag(&p->free_cluster_tail, idx, 0);
+               }
+       }
+}
+
+/*
+ * It's possible scan_swap_map() uses a free cluster in the middle of free
+ * cluster list. Avoiding such abuse to avoid list corruption.
+ */
+static bool
+scan_swap_map_ssd_cluster_conflict(struct swap_info_struct *si,
+       unsigned long offset)
+{
+       struct percpu_cluster *percpu_cluster;
+       bool conflict;
+
+       offset /= SWAPFILE_CLUSTER;
+       conflict = !cluster_is_null(&si->free_cluster_head) &&
+               offset != cluster_next(&si->free_cluster_head) &&
+               cluster_is_free(&si->cluster_info[offset]);
+
+       if (!conflict)
+               return false;
+
+       percpu_cluster = this_cpu_ptr(si->percpu_cluster);
+       cluster_set_null(&percpu_cluster->index);
+       return true;
+}
+
+/*
+ * Try to get a swap entry from current cpu's swap entry pool (a cluster). This
+ * might involve allocating a new cluster for current CPU too.
+ */
+static void scan_swap_map_try_ssd_cluster(struct swap_info_struct *si,
+       unsigned long *offset, unsigned long *scan_base)
+{
+       struct percpu_cluster *cluster;
+       bool found_free;
+       unsigned long tmp;
+
+new_cluster:
+       cluster = this_cpu_ptr(si->percpu_cluster);
+       if (cluster_is_null(&cluster->index)) {
+               if (!cluster_is_null(&si->free_cluster_head)) {
+                       cluster->index = si->free_cluster_head;
+                       cluster->next = cluster_next(&cluster->index) *
+                                       SWAPFILE_CLUSTER;
+               } else if (!cluster_is_null(&si->discard_cluster_head)) {
+                       /*
+                        * we don't have free cluster but have some clusters in
+                        * discarding, do discard now and reclaim them
+                        */
+                       swap_do_scheduled_discard(si);
+                       *scan_base = *offset = si->cluster_next;
+                       goto new_cluster;
+               } else
+                       return;
+       }
+
+       found_free = false;
+
+       /*
+        * Other CPUs can use our cluster if they can't find a free cluster,
+        * check if there is still free entry in the cluster
+        */
+       tmp = cluster->next;
+       while (tmp < si->max && tmp < (cluster_next(&cluster->index) + 1) *
+              SWAPFILE_CLUSTER) {
+               if (!si->swap_map[tmp]) {
+                       found_free = true;
+                       break;
+               }
+               tmp++;
+       }
+       if (!found_free) {
+               cluster_set_null(&cluster->index);
+               goto new_cluster;
+       }
+       cluster->next = tmp + 1;
+       *offset = tmp;
+       *scan_base = tmp;
+}
 
 static unsigned long scan_swap_map(struct swap_info_struct *si,
                                   unsigned char usage)
@@ -191,7 +473,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
        unsigned long scan_base;
        unsigned long last_in_cluster = 0;
        int latency_ration = LATENCY_LIMIT;
-       int found_free_cluster = 0;
 
        /*
         * We try to cluster swap pages by allocating them sequentially
@@ -207,24 +488,18 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
        si->flags += SWP_SCANNING;
        scan_base = offset = si->cluster_next;
 
+       /* SSD algorithm */
+       if (si->cluster_info) {
+               scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
+               goto checks;
+       }
+
        if (unlikely(!si->cluster_nr--)) {
                if (si->pages - si->inuse_pages < SWAPFILE_CLUSTER) {
                        si->cluster_nr = SWAPFILE_CLUSTER - 1;
                        goto checks;
                }
-               if (si->flags & SWP_PAGE_DISCARD) {
-                       /*
-                        * Start range check on racing allocations, in case
-                        * they overlap the cluster we eventually decide on
-                        * (we scan without swap_lock to allow preemption).
-                        * It's hardly conceivable that cluster_nr could be
-                        * wrapped during our scan, but don't depend on it.
-                        */
-                       if (si->lowest_alloc)
-                               goto checks;
-                       si->lowest_alloc = si->max;
-                       si->highest_alloc = 0;
-               }
+
                spin_unlock(&si->lock);
 
                /*
@@ -248,7 +523,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
                                offset -= SWAPFILE_CLUSTER - 1;
                                si->cluster_next = offset;
                                si->cluster_nr = SWAPFILE_CLUSTER - 1;
-                               found_free_cluster = 1;
                                goto checks;
                        }
                        if (unlikely(--latency_ration < 0)) {
@@ -269,7 +543,6 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
                                offset -= SWAPFILE_CLUSTER - 1;
                                si->cluster_next = offset;
                                si->cluster_nr = SWAPFILE_CLUSTER - 1;
-                               found_free_cluster = 1;
                                goto checks;
                        }
                        if (unlikely(--latency_ration < 0)) {
@@ -281,10 +554,13 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,
                offset = scan_base;
                spin_lock(&si->lock);
                si->cluster_nr = SWAPFILE_CLUSTER - 1;
-               si->lowest_alloc = 0;
        }
 
 checks:
+       if (si->cluster_info) {
+               while (scan_swap_map_ssd_cluster_conflict(si, offset))
+                       scan_swap_map_try_ssd_cluster(si, &offset, &scan_base);
+       }
        if (!(si->flags & SWP_WRITEOK))
                goto no_page;
        if (!si->highest_bit)
@@ -317,62 +593,10 @@ checks:
                si->highest_bit = 0;
        }
        si->swap_map[offset] = usage;
+       inc_cluster_info_page(si, si->cluster_info, offset);
        si->cluster_next = offset + 1;
        si->flags -= SWP_SCANNING;
 
-       if (si->lowest_alloc) {
-               /*
-                * Only set when SWP_PAGE_DISCARD, and there's a scan
-                * for a free cluster in progress or just completed.
-                */
-               if (found_free_cluster) {
-                       /*
-                        * To optimize wear-levelling, discard the
-                        * old data of the cluster, taking care not to
-                        * discard any of its pages that have already
-                        * been allocated by racing tasks (offset has
-                        * already stepped over any at the beginning).
-                        */
-                       if (offset < si->highest_alloc &&
-                           si->lowest_alloc <= last_in_cluster)
-                               last_in_cluster = si->lowest_alloc - 1;
-                       si->flags |= SWP_DISCARDING;
-                       spin_unlock(&si->lock);
-
-                       if (offset < last_in_cluster)
-                               discard_swap_cluster(si, offset,
-                                       last_in_cluster - offset + 1);
-
-                       spin_lock(&si->lock);
-                       si->lowest_alloc = 0;
-                       si->flags &= ~SWP_DISCARDING;
-
-                       smp_mb();       /* wake_up_bit advises this */
-                       wake_up_bit(&si->flags, ilog2(SWP_DISCARDING));
-
-               } else if (si->flags & SWP_DISCARDING) {
-                       /*
-                        * Delay using pages allocated by racing tasks
-                        * until the whole discard has been issued. We
-                        * could defer that delay until swap_writepage,
-                        * but it's easier to keep this self-contained.
-                        */
-                       spin_unlock(&si->lock);
-                       wait_on_bit(&si->flags, ilog2(SWP_DISCARDING),
-                               wait_for_discard, TASK_UNINTERRUPTIBLE);
-                       spin_lock(&si->lock);
-               } else {
-                       /*
-                        * Note pages allocated by racing tasks while
-                        * scan for a free cluster is in progress, so
-                        * that its final discard can exclude them.
-                        */
-                       if (offset < si->lowest_alloc)
-                               si->lowest_alloc = offset;
-                       if (offset > si->highest_alloc)
-                               si->highest_alloc = offset;
-               }
-       }
        return offset;
 
 scan:
@@ -527,16 +751,16 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
        return p;
 
 bad_free:
-       printk(KERN_ERR "swap_free: %s%08lx\n", Unused_offset, entry.val);
+       pr_err("swap_free: %s%08lx\n", Unused_offset, entry.val);
        goto out;
 bad_offset:
-       printk(KERN_ERR "swap_free: %s%08lx\n", Bad_offset, entry.val);
+       pr_err("swap_free: %s%08lx\n", Bad_offset, entry.val);
        goto out;
 bad_device:
-       printk(KERN_ERR "swap_free: %s%08lx\n", Unused_file, entry.val);
+       pr_err("swap_free: %s%08lx\n", Unused_file, entry.val);
        goto out;
 bad_nofile:
-       printk(KERN_ERR "swap_free: %s%08lx\n", Bad_file, entry.val);
+       pr_err("swap_free: %s%08lx\n", Bad_file, entry.val);
 out:
        return NULL;
 }
@@ -600,6 +824,7 @@ static unsigned char swap_entry_free(struct swap_info_struct *p,
 
        /* free if no reference */
        if (!usage) {
+               dec_cluster_info_page(p, p->cluster_info, offset);
                if (offset < p->lowest_bit)
                        p->lowest_bit = offset;
                if (offset > p->highest_bit)
@@ -1107,7 +1332,7 @@ static unsigned int find_next_to_unuse(struct swap_info_struct *si,
                        else
                                continue;
                }
-               count = si->swap_map[i];
+               count = ACCESS_ONCE(si->swap_map[i]);
                if (count && swap_count(count) != SWAP_MAP_BAD)
                        break;
        }
@@ -1127,7 +1352,11 @@ int try_to_unuse(unsigned int type, bool frontswap,
 {
        struct swap_info_struct *si = swap_info[type];
        struct mm_struct *start_mm;
-       unsigned char *swap_map;
+       volatile unsigned char *swap_map; /* swap_map is accessed without
+                                          * locking. Mark it as volatile
+                                          * to prevent compiler doing
+                                          * something odd.
+                                          */
        unsigned char swcount;
        struct page *page;
        swp_entry_t entry;
@@ -1178,7 +1407,15 @@ int try_to_unuse(unsigned int type, bool frontswap,
                         * reused since sys_swapoff() already disabled
                         * allocation from here, or alloc_page() failed.
                         */
-                       if (!*swap_map)
+                       swcount = *swap_map;
+                       /*
+                        * We don't hold lock here, so the swap entry could be
+                        * SWAP_MAP_BAD (when the cluster is discarding).
+                        * Instead of fail out, We can just skip the swap
+                        * entry because swapoff will wait for discarding
+                        * finish anyway.
+                        */
+                       if (!swcount || swcount == SWAP_MAP_BAD)
                                continue;
                        retval = -ENOMEM;
                        break;
@@ -1524,7 +1761,8 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
 }
 
 static void _enable_swap_info(struct swap_info_struct *p, int prio,
-                               unsigned char *swap_map)
+                               unsigned char *swap_map,
+                               struct swap_cluster_info *cluster_info)
 {
        int i, prev;
 
@@ -1533,6 +1771,7 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
        else
                p->prio = --least_priority;
        p->swap_map = swap_map;
+       p->cluster_info = cluster_info;
        p->flags |= SWP_WRITEOK;
        atomic_long_add(p->pages, &nr_swap_pages);
        total_swap_pages += p->pages;
@@ -1553,12 +1792,13 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
 
 static void enable_swap_info(struct swap_info_struct *p, int prio,
                                unsigned char *swap_map,
+                               struct swap_cluster_info *cluster_info,
                                unsigned long *frontswap_map)
 {
        frontswap_init(p->type, frontswap_map);
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
-        _enable_swap_info(p, prio, swap_map);
+        _enable_swap_info(p, prio, swap_map, cluster_info);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
 }
@@ -1567,7 +1807,7 @@ static void reinsert_swap_info(struct swap_info_struct *p)
 {
        spin_lock(&swap_lock);
        spin_lock(&p->lock);
-       _enable_swap_info(p, p->prio, p->swap_map);
+       _enable_swap_info(p, p->prio, p->swap_map, p->cluster_info);
        spin_unlock(&p->lock);
        spin_unlock(&swap_lock);
 }
@@ -1576,6 +1816,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 {
        struct swap_info_struct *p = NULL;
        unsigned char *swap_map;
+       struct swap_cluster_info *cluster_info;
        unsigned long *frontswap_map;
        struct file *swap_file, *victim;
        struct address_space *mapping;
@@ -1651,6 +1892,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
                goto out_dput;
        }
 
+       flush_work(&p->discard_work);
+
        destroy_swap_extents(p);
        if (p->flags & SWP_CONTINUED)
                free_swap_count_continuations(p);
@@ -1675,6 +1918,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        p->max = 0;
        swap_map = p->swap_map;
        p->swap_map = NULL;
+       cluster_info = p->cluster_info;
+       p->cluster_info = NULL;
        p->flags = 0;
        frontswap_map = frontswap_map_get(p);
        frontswap_map_set(p, NULL);
@@ -1682,7 +1927,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
        spin_unlock(&swap_lock);
        frontswap_invalidate_area(type);
        mutex_unlock(&swapon_mutex);
+       free_percpu(p->percpu_cluster);
+       p->percpu_cluster = NULL;
        vfree(swap_map);
+       vfree(cluster_info);
        vfree(frontswap_map);
        /* Destroy swap account informatin */
        swap_cgroup_swapoff(type);
@@ -1926,9 +2174,10 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
        int i;
        unsigned long maxpages;
        unsigned long swapfilepages;
+       unsigned long last_page;
 
        if (memcmp("SWAPSPACE2", swap_header->magic.magic, 10)) {
-               printk(KERN_ERR "Unable to find swap-space signature\n");
+               pr_err("Unable to find swap-space signature\n");
                return 0;
        }
 
@@ -1942,9 +2191,8 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
        }
        /* Check the swap header's sub-version */
        if (swap_header->info.version != 1) {
-               printk(KERN_WARNING
-                      "Unable to handle swap header version %d\n",
-                      swap_header->info.version);
+               pr_warn("Unable to handle swap header version %d\n",
+                       swap_header->info.version);
                return 0;
        }
 
@@ -1968,8 +2216,14 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
         */
        maxpages = swp_offset(pte_to_swp_entry(
                        swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1;
-       if (maxpages > swap_header->info.last_page) {
-               maxpages = swap_header->info.last_page + 1;
+       last_page = swap_header->info.last_page;
+       if (last_page > maxpages) {
+               pr_warn("Truncating oversized swap area, only using %luk out of %luk\n",
+                       maxpages << (PAGE_SHIFT - 10),
+                       last_page << (PAGE_SHIFT - 10));
+       }
+       if (maxpages > last_page) {
+               maxpages = last_page + 1;
                /* p->max is an unsigned int: don't overflow it */
                if ((unsigned int)maxpages == 0)
                        maxpages = UINT_MAX;
@@ -1980,8 +2234,7 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
                return 0;
        swapfilepages = i_size_read(inode) >> PAGE_SHIFT;
        if (swapfilepages && maxpages > swapfilepages) {
-               printk(KERN_WARNING
-                      "Swap area shorter than signature indicates\n");
+               pr_warn("Swap area shorter than signature indicates\n");
                return 0;
        }
        if (swap_header->info.nr_badpages && S_ISREG(inode->i_mode))
@@ -1995,15 +2248,23 @@ static unsigned long read_swap_header(struct swap_info_struct *p,
 static int setup_swap_map_and_extents(struct swap_info_struct *p,
                                        union swap_header *swap_header,
                                        unsigned char *swap_map,
+                                       struct swap_cluster_info *cluster_info,
                                        unsigned long maxpages,
                                        sector_t *span)
 {
        int i;
        unsigned int nr_good_pages;
        int nr_extents;
+       unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
+       unsigned long idx = p->cluster_next / SWAPFILE_CLUSTER;
 
        nr_good_pages = maxpages - 1;   /* omit header page */
 
+       cluster_set_null(&p->free_cluster_head);
+       cluster_set_null(&p->free_cluster_tail);
+       cluster_set_null(&p->discard_cluster_head);
+       cluster_set_null(&p->discard_cluster_tail);
+
        for (i = 0; i < swap_header->info.nr_badpages; i++) {
                unsigned int page_nr = swap_header->info.badpages[i];
                if (page_nr == 0 || page_nr > swap_header->info.last_page)
@@ -2011,11 +2272,25 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
                if (page_nr < maxpages) {
                        swap_map[page_nr] = SWAP_MAP_BAD;
                        nr_good_pages--;
+                       /*
+                        * Haven't marked the cluster free yet, no list
+                        * operation involved
+                        */
+                       inc_cluster_info_page(p, cluster_info, page_nr);
                }
        }
 
+       /* Haven't marked the cluster free yet, no list operation involved */
+       for (i = maxpages; i < round_up(maxpages, SWAPFILE_CLUSTER); i++)
+               inc_cluster_info_page(p, cluster_info, i);
+
        if (nr_good_pages) {
                swap_map[0] = SWAP_MAP_BAD;
+               /*
+                * Not mark the cluster free yet, no list
+                * operation involved
+                */
+               inc_cluster_info_page(p, cluster_info, 0);
                p->max = maxpages;
                p->pages = nr_good_pages;
                nr_extents = setup_swap_extents(p, span);
@@ -2024,10 +2299,34 @@ static int setup_swap_map_and_extents(struct swap_info_struct *p,
                nr_good_pages = p->pages;
        }
        if (!nr_good_pages) {
-               printk(KERN_WARNING "Empty swap-file\n");
+               pr_warn("Empty swap-file\n");
                return -EINVAL;
        }
 
+       if (!cluster_info)
+               return nr_extents;
+
+       for (i = 0; i < nr_clusters; i++) {
+               if (!cluster_count(&cluster_info[idx])) {
+                       cluster_set_flag(&cluster_info[idx], CLUSTER_FLAG_FREE);
+                       if (cluster_is_null(&p->free_cluster_head)) {
+                               cluster_set_next_flag(&p->free_cluster_head,
+                                                               idx, 0);
+                               cluster_set_next_flag(&p->free_cluster_tail,
+                                                               idx, 0);
+                       } else {
+                               unsigned int tail;
+
+                               tail = cluster_next(&p->free_cluster_tail);
+                               cluster_set_next(&cluster_info[tail], idx);
+                               cluster_set_next_flag(&p->free_cluster_tail,
+                                                               idx, 0);
+                       }
+               }
+               idx++;
+               if (idx == nr_clusters)
+                       idx = 0;
+       }
        return nr_extents;
 }
 
@@ -2059,6 +2358,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        sector_t span;
        unsigned long maxpages;
        unsigned char *swap_map = NULL;
+       struct swap_cluster_info *cluster_info = NULL;
        unsigned long *frontswap_map = NULL;
        struct page *page = NULL;
        struct inode *inode = NULL;
@@ -2073,6 +2373,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        if (IS_ERR(p))
                return PTR_ERR(p);
 
+       INIT_WORK(&p->discard_work, swap_discard_work);
+
        name = getname(specialfile);
        if (IS_ERR(name)) {
                error = PTR_ERR(name);
@@ -2132,13 +2434,38 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
                error = -ENOMEM;
                goto bad_swap;
        }
+       if (p->bdev && blk_queue_nonrot(bdev_get_queue(p->bdev))) {
+               p->flags |= SWP_SOLIDSTATE;
+               /*
+                * select a random position to start with to help wear leveling
+                * SSD
+                */
+               p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
+
+               cluster_info = vzalloc(DIV_ROUND_UP(maxpages,
+                       SWAPFILE_CLUSTER) * sizeof(*cluster_info));
+               if (!cluster_info) {
+                       error = -ENOMEM;
+                       goto bad_swap;
+               }
+               p->percpu_cluster = alloc_percpu(struct percpu_cluster);
+               if (!p->percpu_cluster) {
+                       error = -ENOMEM;
+                       goto bad_swap;
+               }
+               for_each_possible_cpu(i) {
+                       struct percpu_cluster *cluster;
+                       cluster = per_cpu_ptr(p->percpu_cluster, i);
+                       cluster_set_null(&cluster->index);
+               }
+       }
 
        error = swap_cgroup_swapon(p->type, maxpages);
        if (error)
                goto bad_swap;
 
        nr_extents = setup_swap_map_and_extents(p, swap_header, swap_map,
-               maxpages, &span);
+               cluster_info, maxpages, &span);
        if (unlikely(nr_extents < 0)) {
                error = nr_extents;
                goto bad_swap;
@@ -2147,41 +2474,33 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        if (frontswap_enabled)
                frontswap_map = vzalloc(BITS_TO_LONGS(maxpages) * sizeof(long));
 
-       if (p->bdev) {
-               if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
-                       p->flags |= SWP_SOLIDSTATE;
-                       p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
-               }
-
-               if ((swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
-                       /*
-                        * When discard is enabled for swap with no particular
-                        * policy flagged, we set all swap discard flags here in
-                        * order to sustain backward compatibility with older
-                        * swapon(8) releases.
-                        */
-                       p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
-                                    SWP_PAGE_DISCARD);
+       if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
+               /*
+                * When discard is enabled for swap with no particular
+                * policy flagged, we set all swap discard flags here in
+                * order to sustain backward compatibility with older
+                * swapon(8) releases.
+                */
+               p->flags |= (SWP_DISCARDABLE | SWP_AREA_DISCARD |
+                            SWP_PAGE_DISCARD);
 
-                       /*
-                        * By flagging sys_swapon, a sysadmin can tell us to
-                        * either do single-time area discards only, or to just
-                        * perform discards for released swap page-clusters.
-                        * Now it's time to adjust the p->flags accordingly.
-                        */
-                       if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
-                               p->flags &= ~SWP_PAGE_DISCARD;
-                       else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
-                               p->flags &= ~SWP_AREA_DISCARD;
-
-                       /* issue a swapon-time discard if it's still required */
-                       if (p->flags & SWP_AREA_DISCARD) {
-                               int err = discard_swap(p);
-                               if (unlikely(err))
-                                       printk(KERN_ERR
-                                              "swapon: discard_swap(%p): %d\n",
-                                               p, err);
-                       }
+               /*
+                * By flagging sys_swapon, a sysadmin can tell us to
+                * either do single-time area discards only, or to just
+                * perform discards for released swap page-clusters.
+                * Now it's time to adjust the p->flags accordingly.
+                */
+               if (swap_flags & SWAP_FLAG_DISCARD_ONCE)
+                       p->flags &= ~SWP_PAGE_DISCARD;
+               else if (swap_flags & SWAP_FLAG_DISCARD_PAGES)
+                       p->flags &= ~SWP_AREA_DISCARD;
+
+               /* issue a swapon-time discard if it's still required */
+               if (p->flags & SWP_AREA_DISCARD) {
+                       int err = discard_swap(p);
+                       if (unlikely(err))
+                               pr_err("swapon: discard_swap(%p): %d\n",
+                                       p, err);
                }
        }
 
@@ -2190,9 +2509,9 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        if (swap_flags & SWAP_FLAG_PREFER)
                prio =
                  (swap_flags & SWAP_FLAG_PRIO_MASK) >> SWAP_FLAG_PRIO_SHIFT;
-       enable_swap_info(p, prio, swap_map, frontswap_map);
+       enable_swap_info(p, prio, swap_map, cluster_info, frontswap_map);
 
-       printk(KERN_INFO "Adding %uk swap on %s.  "
+       pr_info("Adding %uk swap on %s.  "
                        "Priority:%d extents:%d across:%lluk %s%s%s%s%s\n",
                p->pages<<(PAGE_SHIFT-10), name->name, p->prio,
                nr_extents, (unsigned long long)span<<(PAGE_SHIFT-10),
@@ -2211,6 +2530,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
        error = 0;
        goto out;
 bad_swap:
+       free_percpu(p->percpu_cluster);
+       p->percpu_cluster = NULL;
        if (inode && S_ISBLK(inode->i_mode) && p->bdev) {
                set_blocksize(p->bdev, p->old_block_size);
                blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
@@ -2222,6 +2543,7 @@ bad_swap:
        p->flags = 0;
        spin_unlock(&swap_lock);
        vfree(swap_map);
+       vfree(cluster_info);
        if (swap_file) {
                if (inode && S_ISREG(inode->i_mode)) {
                        mutex_unlock(&inode->i_mutex);
@@ -2291,6 +2613,16 @@ static int __swap_duplicate(swp_entry_t entry, unsigned char usage)
                goto unlock_out;
 
        count = p->swap_map[offset];
+
+       /*
+        * swapin_readahead() doesn't check if a swap entry is valid, so the
+        * swap entry could be SWAP_MAP_BAD. Check here with lock held.
+        */
+       if (unlikely(swap_count(count) == SWAP_MAP_BAD)) {
+               err = -ENOENT;
+               goto unlock_out;
+       }
+
        has_cache = count & SWAP_HAS_CACHE;
        count &= ~SWAP_HAS_CACHE;
        err = 0;
@@ -2326,7 +2658,7 @@ out:
        return err;
 
 bad_file:
-       printk(KERN_ERR "swap_dup: %s%08lx\n", Bad_file, entry.val);
+       pr_err("swap_dup: %s%08lx\n", Bad_file, entry.val);
        goto out;
 }
 
index e2e8a8a..353b683 100644 (file)
@@ -567,7 +567,6 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
 /**
  * truncate_pagecache - unmap and remove pagecache that has been truncated
  * @inode: inode
- * @oldsize: old file size
  * @newsize: new file size
  *
  * inode's new i_size must already be written before truncate_pagecache
@@ -580,7 +579,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2);
  * situations such as writepage being called for a page that has already
  * had its underlying blocks deallocated.
  */
-void truncate_pagecache(struct inode *inode, loff_t oldsize, loff_t newsize)
+void truncate_pagecache(struct inode *inode, loff_t newsize)
 {
        struct address_space *mapping = inode->i_mapping;
        loff_t holebegin = round_up(newsize, PAGE_SIZE);
@@ -614,12 +613,8 @@ EXPORT_SYMBOL(truncate_pagecache);
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
-       loff_t oldsize;
-
-       oldsize = inode->i_size;
        i_size_write(inode, newsize);
-
-       truncate_pagecache(inode, oldsize, newsize);
+       truncate_pagecache(inode, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
index 7441c41..eaf63fc 100644 (file)
--- a/mm/util.c
+++ b/mm/util.c
@@ -388,15 +388,12 @@ struct address_space *page_mapping(struct page *page)
        struct address_space *mapping = page->mapping;
 
        VM_BUG_ON(PageSlab(page));
-#ifdef CONFIG_SWAP
        if (unlikely(PageSwapCache(page))) {
                swp_entry_t entry;
 
                entry.val = page_private(page);
                mapping = swap_address_space(entry);
-       } else
-#endif
-       if ((unsigned long)mapping & PAGE_MAPPING_ANON)
+       } else if ((unsigned long)mapping & PAGE_MAPPING_ANON)
                mapping = NULL;
        return mapping;
 }
index 13a5495..1074543 100644 (file)
@@ -752,7 +752,6 @@ struct vmap_block_queue {
 struct vmap_block {
        spinlock_t lock;
        struct vmap_area *va;
-       struct vmap_block_queue *vbq;
        unsigned long free, dirty;
        DECLARE_BITMAP(dirty_map, VMAP_BBMAP_BITS);
        struct list_head free_list;
@@ -830,7 +829,6 @@ static struct vmap_block *new_vmap_block(gfp_t gfp_mask)
        radix_tree_preload_end();
 
        vbq = &get_cpu_var(vmap_block_queue);
-       vb->vbq = vbq;
        spin_lock(&vbq->lock);
        list_add_rcu(&vb->free_list, &vbq->free);
        spin_unlock(&vbq->lock);
@@ -1018,15 +1016,16 @@ void vm_unmap_aliases(void)
 
                rcu_read_lock();
                list_for_each_entry_rcu(vb, &vbq->free, free_list) {
-                       int i;
+                       int i, j;
 
                        spin_lock(&vb->lock);
                        i = find_first_bit(vb->dirty_map, VMAP_BBMAP_BITS);
-                       while (i < VMAP_BBMAP_BITS) {
+                       if (i < VMAP_BBMAP_BITS) {
                                unsigned long s, e;
-                               int j;
-                               j = find_next_zero_bit(vb->dirty_map,
-                                       VMAP_BBMAP_BITS, i);
+
+                               j = find_last_bit(vb->dirty_map,
+                                                       VMAP_BBMAP_BITS);
+                               j = j + 1; /* need exclusive index */
 
                                s = vb->va->va_start + (i << PAGE_SHIFT);
                                e = vb->va->va_start + (j << PAGE_SHIFT);
@@ -1036,10 +1035,6 @@ void vm_unmap_aliases(void)
                                        start = s;
                                if (e > end)
                                        end = e;
-
-                               i = j;
-                               i = find_next_bit(vb->dirty_map,
-                                                       VMAP_BBMAP_BITS, i);
                        }
                        spin_unlock(&vb->lock);
                }
@@ -1263,7 +1258,7 @@ void unmap_kernel_range(unsigned long addr, unsigned long size)
 int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 {
        unsigned long addr = (unsigned long)area->addr;
-       unsigned long end = addr + area->size - PAGE_SIZE;
+       unsigned long end = addr + get_vm_area_size(area);
        int err;
 
        err = vmap_page_range(addr, end, prot, *pages);
@@ -1558,7 +1553,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
        unsigned int nr_pages, array_size, i;
        gfp_t nested_gfp = (gfp_mask & GFP_RECLAIM_MASK) | __GFP_ZERO;
 
-       nr_pages = (area->size - PAGE_SIZE) >> PAGE_SHIFT;
+       nr_pages = get_vm_area_size(area) >> PAGE_SHIFT;
        array_size = (nr_pages * sizeof(struct page *));
 
        area->nr_pages = nr_pages;
@@ -1990,7 +1985,7 @@ long vread(char *buf, char *addr, unsigned long count)
 
                vm = va->vm;
                vaddr = (char *) vm->addr;
-               if (addr >= vaddr + vm->size - PAGE_SIZE)
+               if (addr >= vaddr + get_vm_area_size(vm))
                        continue;
                while (addr < vaddr) {
                        if (count == 0)
@@ -2000,7 +1995,7 @@ long vread(char *buf, char *addr, unsigned long count)
                        addr++;
                        count--;
                }
-               n = vaddr + vm->size - PAGE_SIZE - addr;
+               n = vaddr + get_vm_area_size(vm) - addr;
                if (n > count)
                        n = count;
                if (!(vm->flags & VM_IOREMAP))
@@ -2072,7 +2067,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
 
                vm = va->vm;
                vaddr = (char *) vm->addr;
-               if (addr >= vaddr + vm->size - PAGE_SIZE)
+               if (addr >= vaddr + get_vm_area_size(vm))
                        continue;
                while (addr < vaddr) {
                        if (count == 0)
@@ -2081,7 +2076,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
                        addr++;
                        count--;
                }
-               n = vaddr + vm->size - PAGE_SIZE - addr;
+               n = vaddr + get_vm_area_size(vm) - addr;
                if (n > count)
                        n = count;
                if (!(vm->flags & VM_IOREMAP)) {
index 2cff0d4..8ed1b77 100644 (file)
@@ -139,13 +139,44 @@ static bool global_reclaim(struct scan_control *sc)
 {
        return !sc->target_mem_cgroup;
 }
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+       struct mem_cgroup *root = sc->target_mem_cgroup;
+       return !mem_cgroup_disabled() &&
+               mem_cgroup_soft_reclaim_eligible(root, root) != SKIP_TREE;
+}
 #else
 static bool global_reclaim(struct scan_control *sc)
 {
        return true;
 }
+
+static bool mem_cgroup_should_soft_reclaim(struct scan_control *sc)
+{
+       return false;
+}
 #endif
 
+unsigned long zone_reclaimable_pages(struct zone *zone)
+{
+       int nr;
+
+       nr = zone_page_state(zone, NR_ACTIVE_FILE) +
+            zone_page_state(zone, NR_INACTIVE_FILE);
+
+       if (get_nr_swap_pages() > 0)
+               nr += zone_page_state(zone, NR_ACTIVE_ANON) +
+                     zone_page_state(zone, NR_INACTIVE_ANON);
+
+       return nr;
+}
+
+bool zone_reclaimable(struct zone *zone)
+{
+       return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
+}
+
 static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 {
        if (!mem_cgroup_disabled())
@@ -155,14 +186,31 @@ static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru)
 }
 
 /*
- * Add a shrinker callback to be called from the vm
+ * Add a shrinker callback to be called from the vm.
  */
-void register_shrinker(struct shrinker *shrinker)
+int register_shrinker(struct shrinker *shrinker)
 {
-       atomic_long_set(&shrinker->nr_in_batch, 0);
+       size_t size = sizeof(*shrinker->nr_deferred);
+
+       /*
+        * If we only have one possible node in the system anyway, save
+        * ourselves the trouble and disable NUMA aware behavior. This way we
+        * will save memory and some small loop time later.
+        */
+       if (nr_node_ids == 1)
+               shrinker->flags &= ~SHRINKER_NUMA_AWARE;
+
+       if (shrinker->flags & SHRINKER_NUMA_AWARE)
+               size *= nr_node_ids;
+
+       shrinker->nr_deferred = kzalloc(size, GFP_KERNEL);
+       if (!shrinker->nr_deferred)
+               return -ENOMEM;
+
        down_write(&shrinker_rwsem);
        list_add_tail(&shrinker->list, &shrinker_list);
        up_write(&shrinker_rwsem);
+       return 0;
 }
 EXPORT_SYMBOL(register_shrinker);
 
@@ -177,15 +225,102 @@ void unregister_shrinker(struct shrinker *shrinker)
 }
 EXPORT_SYMBOL(unregister_shrinker);
 
-static inline int do_shrinker_shrink(struct shrinker *shrinker,
-                                    struct shrink_control *sc,
-                                    unsigned long nr_to_scan)
-{
-       sc->nr_to_scan = nr_to_scan;
-       return (*shrinker->shrink)(shrinker, sc);
+#define SHRINK_BATCH 128
+
+static unsigned long
+shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
+                unsigned long nr_pages_scanned, unsigned long lru_pages)
+{
+       unsigned long freed = 0;
+       unsigned long long delta;
+       long total_scan;
+       long max_pass;
+       long nr;
+       long new_nr;
+       int nid = shrinkctl->nid;
+       long batch_size = shrinker->batch ? shrinker->batch
+                                         : SHRINK_BATCH;
+
+       max_pass = shrinker->count_objects(shrinker, shrinkctl);
+       if (max_pass == 0)
+               return 0;
+
+       /*
+        * copy the current shrinker scan count into a local variable
+        * and zero it so that other concurrent shrinker invocations
+        * don't also do this scanning work.
+        */
+       nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+
+       total_scan = nr;
+       delta = (4 * nr_pages_scanned) / shrinker->seeks;
+       delta *= max_pass;
+       do_div(delta, lru_pages + 1);
+       total_scan += delta;
+       if (total_scan < 0) {
+               printk(KERN_ERR
+               "shrink_slab: %pF negative objects to delete nr=%ld\n",
+                      shrinker->scan_objects, total_scan);
+               total_scan = max_pass;
+       }
+
+       /*
+        * We need to avoid excessive windup on filesystem shrinkers
+        * due to large numbers of GFP_NOFS allocations causing the
+        * shrinkers to return -1 all the time. This results in a large
+        * nr being built up so when a shrink that can do some work
+        * comes along it empties the entire cache due to nr >>>
+        * max_pass.  This is bad for sustaining a working set in
+        * memory.
+        *
+        * Hence only allow the shrinker to scan the entire cache when
+        * a large delta change is calculated directly.
+        */
+       if (delta < max_pass / 4)
+               total_scan = min(total_scan, max_pass / 2);
+
+       /*
+        * Avoid risking looping forever due to too large nr value:
+        * never try to free more than twice the estimate number of
+        * freeable entries.
+        */
+       if (total_scan > max_pass * 2)
+               total_scan = max_pass * 2;
+
+       trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
+                               nr_pages_scanned, lru_pages,
+                               max_pass, delta, total_scan);
+
+       while (total_scan >= batch_size) {
+               unsigned long ret;
+
+               shrinkctl->nr_to_scan = batch_size;
+               ret = shrinker->scan_objects(shrinker, shrinkctl);
+               if (ret == SHRINK_STOP)
+                       break;
+               freed += ret;
+
+               count_vm_events(SLABS_SCANNED, batch_size);
+               total_scan -= batch_size;
+
+               cond_resched();
+       }
+
+       /*
+        * move the unused scan count back into the shrinker in a
+        * manner that handles concurrent updates. If we exhausted the
+        * scan, there is no need to do an update.
+        */
+       if (total_scan > 0)
+               new_nr = atomic_long_add_return(total_scan,
+                                               &shrinker->nr_deferred[nid]);
+       else
+               new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+
+       trace_mm_shrink_slab_end(shrinker, freed, nr, new_nr);
+       return freed;
 }
 
-#define SHRINK_BATCH 128
 /*
  * Call the shrink functions to age shrinkable caches
  *
@@ -205,115 +340,45 @@ static inline int do_shrinker_shrink(struct shrinker *shrinker,
  *
  * Returns the number of slab objects which we shrunk.
  */
-unsigned long shrink_slab(struct shrink_control *shrink,
+unsigned long shrink_slab(struct shrink_control *shrinkctl,
                          unsigned long nr_pages_scanned,
                          unsigned long lru_pages)
 {
        struct shrinker *shrinker;
-       unsigned long ret = 0;
+       unsigned long freed = 0;
 
        if (nr_pages_scanned == 0)
                nr_pages_scanned = SWAP_CLUSTER_MAX;
 
        if (!down_read_trylock(&shrinker_rwsem)) {
-               /* Assume we'll be able to shrink next time */
-               ret = 1;
+               /*
+                * If we would return 0, our callers would understand that we
+                * have nothing else to shrink and give up trying. By returning
+                * 1 we keep it going and assume we'll be able to shrink next
+                * time.
+                */
+               freed = 1;
                goto out;
        }
 
        list_for_each_entry(shrinker, &shrinker_list, list) {
-               unsigned long long delta;
-               long total_scan;
-               long max_pass;
-               int shrink_ret = 0;
-               long nr;
-               long new_nr;
-               long batch_size = shrinker->batch ? shrinker->batch
-                                                 : SHRINK_BATCH;
-
-               max_pass = do_shrinker_shrink(shrinker, shrink, 0);
-               if (max_pass <= 0)
-                       continue;
-
-               /*
-                * copy the current shrinker scan count into a local variable
-                * and zero it so that other concurrent shrinker invocations
-                * don't also do this scanning work.
-                */
-               nr = atomic_long_xchg(&shrinker->nr_in_batch, 0);
-
-               total_scan = nr;
-               delta = (4 * nr_pages_scanned) / shrinker->seeks;
-               delta *= max_pass;
-               do_div(delta, lru_pages + 1);
-               total_scan += delta;
-               if (total_scan < 0) {
-                       printk(KERN_ERR "shrink_slab: %pF negative objects to "
-                              "delete nr=%ld\n",
-                              shrinker->shrink, total_scan);
-                       total_scan = max_pass;
-               }
-
-               /*
-                * We need to avoid excessive windup on filesystem shrinkers
-                * due to large numbers of GFP_NOFS allocations causing the
-                * shrinkers to return -1 all the time. This results in a large
-                * nr being built up so when a shrink that can do some work
-                * comes along it empties the entire cache due to nr >>>
-                * max_pass.  This is bad for sustaining a working set in
-                * memory.
-                *
-                * Hence only allow the shrinker to scan the entire cache when
-                * a large delta change is calculated directly.
-                */
-               if (delta < max_pass / 4)
-                       total_scan = min(total_scan, max_pass / 2);
-
-               /*
-                * Avoid risking looping forever due to too large nr value:
-                * never try to free more than twice the estimate number of
-                * freeable entries.
-                */
-               if (total_scan > max_pass * 2)
-                       total_scan = max_pass * 2;
-
-               trace_mm_shrink_slab_start(shrinker, shrink, nr,
-                                       nr_pages_scanned, lru_pages,
-                                       max_pass, delta, total_scan);
-
-               while (total_scan >= batch_size) {
-                       int nr_before;
+               for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
+                       if (!node_online(shrinkctl->nid))
+                               continue;
 
-                       nr_before = do_shrinker_shrink(shrinker, shrink, 0);
-                       shrink_ret = do_shrinker_shrink(shrinker, shrink,
-                                                       batch_size);
-                       if (shrink_ret == -1)
+                       if (!(shrinker->flags & SHRINKER_NUMA_AWARE) &&
+                           (shrinkctl->nid != 0))
                                break;
-                       if (shrink_ret < nr_before)
-                               ret += nr_before - shrink_ret;
-                       count_vm_events(SLABS_SCANNED, batch_size);
-                       total_scan -= batch_size;
-
-                       cond_resched();
-               }
 
-               /*
-                * move the unused scan count back into the shrinker in a
-                * manner that handles concurrent updates. If we exhausted the
-                * scan, there is no need to do an update.
-                */
-               if (total_scan > 0)
-                       new_nr = atomic_long_add_return(total_scan,
-                                       &shrinker->nr_in_batch);
-               else
-                       new_nr = atomic_long_read(&shrinker->nr_in_batch);
+                       freed += shrink_slab_node(shrinkctl, shrinker,
+                                nr_pages_scanned, lru_pages);
 
-               trace_mm_shrink_slab_end(shrinker, shrink_ret, nr, new_nr);
+               }
        }
        up_read(&shrinker_rwsem);
 out:
        cond_resched();
-       return ret;
+       return freed;
 }
 
 static inline int is_page_cache_freeable(struct page *page)
@@ -545,7 +610,7 @@ int remove_mapping(struct address_space *mapping, struct page *page)
  */
 void putback_lru_page(struct page *page)
 {
-       int lru;
+       bool is_unevictable;
        int was_unevictable = PageUnevictable(page);
 
        VM_BUG_ON(PageLRU(page));
@@ -560,14 +625,14 @@ redo:
                 * unevictable page on [in]active list.
                 * We know how to handle that.
                 */
-               lru = page_lru_base_type(page);
+               is_unevictable = false;
                lru_cache_add(page);
        } else {
                /*
                 * Put unevictable pages directly on zone's unevictable
                 * list.
                 */
-               lru = LRU_UNEVICTABLE;
+               is_unevictable = true;
                add_page_to_unevictable_list(page);
                /*
                 * When racing with an mlock or AS_UNEVICTABLE clearing
@@ -587,7 +652,7 @@ redo:
         * page is on unevictable list, it never be freed. To avoid that,
         * check after we added it to the list, again.
         */
-       if (lru == LRU_UNEVICTABLE && page_evictable(page)) {
+       if (is_unevictable && page_evictable(page)) {
                if (!isolate_lru_page(page)) {
                        put_page(page);
                        goto redo;
@@ -598,9 +663,9 @@ redo:
                 */
        }
 
-       if (was_unevictable && lru != LRU_UNEVICTABLE)
+       if (was_unevictable && !is_unevictable)
                count_vm_event(UNEVICTABLE_PGRESCUED);
-       else if (!was_unevictable && lru == LRU_UNEVICTABLE)
+       else if (!was_unevictable && is_unevictable)
                count_vm_event(UNEVICTABLE_PGCULLED);
 
        put_page(page);         /* drop ref from isolate */
@@ -1789,7 +1854,7 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
         * latencies, so it's better to scan a minimum amount there as
         * well.
         */
-       if (current_is_kswapd() && zone->all_unreclaimable)
+       if (current_is_kswapd() && !zone_reclaimable(zone))
                force_scan = true;
        if (!global_reclaim(sc))
                force_scan = true;
@@ -2111,9 +2176,11 @@ static inline bool should_continue_reclaim(struct zone *zone,
        }
 }
 
-static void shrink_zone(struct zone *zone, struct scan_control *sc)
+static int
+__shrink_zone(struct zone *zone, struct scan_control *sc, bool soft_reclaim)
 {
        unsigned long nr_reclaimed, nr_scanned;
+       int groups_scanned = 0;
 
        do {
                struct mem_cgroup *root = sc->target_mem_cgroup;
@@ -2121,15 +2188,17 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                        .zone = zone,
                        .priority = sc->priority,
                };
-               struct mem_cgroup *memcg;
+               struct mem_cgroup *memcg = NULL;
+               mem_cgroup_iter_filter filter = (soft_reclaim) ?
+                       mem_cgroup_soft_reclaim_eligible : NULL;
 
                nr_reclaimed = sc->nr_reclaimed;
                nr_scanned = sc->nr_scanned;
 
-               memcg = mem_cgroup_iter(root, NULL, &reclaim);
-               do {
+               while ((memcg = mem_cgroup_iter_cond(root, memcg, &reclaim, filter))) {
                        struct lruvec *lruvec;
 
+                       groups_scanned++;
                        lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 
                        shrink_lruvec(lruvec, sc);
@@ -2149,8 +2218,7 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
                                mem_cgroup_iter_break(root, memcg);
                                break;
                        }
-                       memcg = mem_cgroup_iter(root, memcg, &reclaim);
-               } while (memcg);
+               }
 
                vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
                           sc->nr_scanned - nr_scanned,
@@ -2158,6 +2226,37 @@ static void shrink_zone(struct zone *zone, struct scan_control *sc)
 
        } while (should_continue_reclaim(zone, sc->nr_reclaimed - nr_reclaimed,
                                         sc->nr_scanned - nr_scanned, sc));
+
+       return groups_scanned;
+}
+
+
+static void shrink_zone(struct zone *zone, struct scan_control *sc)
+{
+       bool do_soft_reclaim = mem_cgroup_should_soft_reclaim(sc);
+       unsigned long nr_scanned = sc->nr_scanned;
+       int scanned_groups;
+
+       scanned_groups = __shrink_zone(zone, sc, do_soft_reclaim);
+       /*
+        * memcg iterator might race with other reclaimer or start from
+        * a incomplete tree walk so the tree walk in __shrink_zone
+        * might have missed groups that are above the soft limit. Try
+        * another loop to catch up with others. Do it just once to
+        * prevent from reclaim latencies when other reclaimers always
+        * preempt this one.
+        */
+       if (do_soft_reclaim && !scanned_groups)
+               __shrink_zone(zone, sc, do_soft_reclaim);
+
+       /*
+        * No group is over the soft limit or those that are do not have
+        * pages in the zone we are reclaiming so we have to reclaim everybody
+        */
+       if (do_soft_reclaim && (sc->nr_scanned == nr_scanned)) {
+               __shrink_zone(zone, sc, false);
+               return;
+       }
 }
 
 /* Returns true if compaction should go ahead for a high-order request */
@@ -2221,8 +2320,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 {
        struct zoneref *z;
        struct zone *zone;
-       unsigned long nr_soft_reclaimed;
-       unsigned long nr_soft_scanned;
        bool aborted_reclaim = false;
 
        /*
@@ -2244,8 +2341,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                if (global_reclaim(sc)) {
                        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                continue;
-                       if (zone->all_unreclaimable &&
-                                       sc->priority != DEF_PRIORITY)
+                       if (sc->priority != DEF_PRIORITY &&
+                           !zone_reclaimable(zone))
                                continue;       /* Let kswapd poll it */
                        if (IS_ENABLED(CONFIG_COMPACTION)) {
                                /*
@@ -2262,18 +2359,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
                                        continue;
                                }
                        }
-                       /*
-                        * This steals pages from memory cgroups over softlimit
-                        * and returns the number of reclaimed pages and
-                        * scanned pages. This works for global memory pressure
-                        * and balancing, not for a memcg's limit.
-                        */
-                       nr_soft_scanned = 0;
-                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
-                                               sc->order, sc->gfp_mask,
-                                               &nr_soft_scanned);
-                       sc->nr_reclaimed += nr_soft_reclaimed;
-                       sc->nr_scanned += nr_soft_scanned;
                        /* need some check for avoid more shrink_zone() */
                }
 
@@ -2283,11 +2368,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
        return aborted_reclaim;
 }
 
-static bool zone_reclaimable(struct zone *zone)
-{
-       return zone->pages_scanned < zone_reclaimable_pages(zone) * 6;
-}
-
 /* All zones in zonelist are unreclaimable? */
 static bool all_unreclaimable(struct zonelist *zonelist,
                struct scan_control *sc)
@@ -2301,7 +2381,7 @@ static bool all_unreclaimable(struct zonelist *zonelist,
                        continue;
                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                        continue;
-               if (!zone->all_unreclaimable)
+               if (zone_reclaimable(zone))
                        return false;
        }
 
@@ -2354,12 +2434,16 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
                 */
                if (global_reclaim(sc)) {
                        unsigned long lru_pages = 0;
+
+                       nodes_clear(shrink->nodes_to_scan);
                        for_each_zone_zonelist(zone, z, zonelist,
                                        gfp_zone(sc->gfp_mask)) {
                                if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                                        continue;
 
                                lru_pages += zone_reclaimable_pages(zone);
+                               node_set(zone_to_nid(zone),
+                                        shrink->nodes_to_scan);
                        }
 
                        shrink_slab(shrink, sc->nr_scanned, lru_pages);
@@ -2712,7 +2796,7 @@ static bool pgdat_balanced(pg_data_t *pgdat, int order, int classzone_idx)
                 * DEF_PRIORITY. Effectively, it considers them balanced so
                 * they must be considered balanced here as well!
                 */
-               if (zone->all_unreclaimable) {
+               if (!zone_reclaimable(zone)) {
                        balanced_pages += zone->managed_pages;
                        continue;
                }
@@ -2773,7 +2857,6 @@ static bool kswapd_shrink_zone(struct zone *zone,
                               unsigned long lru_pages,
                               unsigned long *nr_attempted)
 {
-       unsigned long nr_slab;
        int testorder = sc->order;
        unsigned long balance_gap;
        struct reclaim_state *reclaim_state = current->reclaim_state;
@@ -2816,17 +2899,16 @@ static bool kswapd_shrink_zone(struct zone *zone,
                return true;
 
        shrink_zone(zone, sc);
+       nodes_clear(shrink.nodes_to_scan);
+       node_set(zone_to_nid(zone), shrink.nodes_to_scan);
 
        reclaim_state->reclaimed_slab = 0;
-       nr_slab = shrink_slab(&shrink, sc->nr_scanned, lru_pages);
+       shrink_slab(&shrink, sc->nr_scanned, lru_pages);
        sc->nr_reclaimed += reclaim_state->reclaimed_slab;
 
        /* Account for the number of pages attempted to reclaim */
        *nr_attempted += sc->nr_to_reclaim;
 
-       if (nr_slab == 0 && !zone_reclaimable(zone))
-               zone->all_unreclaimable = 1;
-
        zone_clear_flag(zone, ZONE_WRITEBACK);
 
        /*
@@ -2835,7 +2917,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
         * BDIs but as pressure is relieved, speculatively avoid congestion
         * waits.
         */
-       if (!zone->all_unreclaimable &&
+       if (zone_reclaimable(zone) &&
            zone_balanced(zone, testorder, 0, classzone_idx)) {
                zone_clear_flag(zone, ZONE_CONGESTED);
                zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
@@ -2870,8 +2952,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 {
        int i;
        int end_zone = 0;       /* Inclusive.  0 = ZONE_DMA */
-       unsigned long nr_soft_reclaimed;
-       unsigned long nr_soft_scanned;
        struct scan_control sc = {
                .gfp_mask = GFP_KERNEL,
                .priority = DEF_PRIORITY,
@@ -2901,8 +2981,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                        if (!populated_zone(zone))
                                continue;
 
-                       if (zone->all_unreclaimable &&
-                           sc.priority != DEF_PRIORITY)
+                       if (sc.priority != DEF_PRIORITY &&
+                           !zone_reclaimable(zone))
                                continue;
 
                        /*
@@ -2980,21 +3060,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
                        if (!populated_zone(zone))
                                continue;
 
-                       if (zone->all_unreclaimable &&
-                           sc.priority != DEF_PRIORITY)
+                       if (sc.priority != DEF_PRIORITY &&
+                           !zone_reclaimable(zone))
                                continue;
 
                        sc.nr_scanned = 0;
 
-                       nr_soft_scanned = 0;
-                       /*
-                        * Call soft limit reclaim before calling shrink_zone.
-                        */
-                       nr_soft_reclaimed = mem_cgroup_soft_limit_reclaim(zone,
-                                                       order, sc.gfp_mask,
-                                                       &nr_soft_scanned);
-                       sc.nr_reclaimed += nr_soft_reclaimed;
-
                        /*
                         * There should be no need to raise the scanning
                         * priority if enough pages are already being scanned
@@ -3237,7 +3308,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
        }
        if (!waitqueue_active(&pgdat->kswapd_wait))
                return;
-       if (zone_watermark_ok_safe(zone, order, low_wmark_pages(zone), 0, 0))
+       if (zone_balanced(zone, order, 0, 0))
                return;
 
        trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
@@ -3265,20 +3336,6 @@ unsigned long global_reclaimable_pages(void)
        return nr;
 }
 
-unsigned long zone_reclaimable_pages(struct zone *zone)
-{
-       int nr;
-
-       nr = zone_page_state(zone, NR_ACTIVE_FILE) +
-            zone_page_state(zone, NR_INACTIVE_FILE);
-
-       if (get_nr_swap_pages() > 0)
-               nr += zone_page_state(zone, NR_ACTIVE_ANON) +
-                     zone_page_state(zone, NR_INACTIVE_ANON);
-
-       return nr;
-}
-
 #ifdef CONFIG_HIBERNATION
 /*
  * Try to free `nr_to_reclaim' of memory, system-wide, and return the number of
@@ -3524,10 +3581,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * number of slab pages and shake the slab until it is reduced
                 * by the same nr_pages that we used for reclaiming unmapped
                 * pages.
-                *
-                * Note that shrink_slab will free memory on all zones and may
-                * take a long time.
                 */
+               nodes_clear(shrink.nodes_to_scan);
+               node_set(zone_to_nid(zone), shrink.nodes_to_scan);
                for (;;) {
                        unsigned long lru_pages = zone_reclaimable_pages(zone);
 
@@ -3576,7 +3632,7 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
            zone_page_state(zone, NR_SLAB_RECLAIMABLE) <= zone->min_slab_pages)
                return ZONE_RECLAIM_FULL;
 
-       if (zone->all_unreclaimable)
+       if (!zone_reclaimable(zone))
                return ZONE_RECLAIM_FULL;
 
        /*
index 20c2ef4..9bb3145 100644 (file)
@@ -19,6 +19,9 @@
 #include <linux/math64.h>
 #include <linux/writeback.h>
 #include <linux/compaction.h>
+#include <linux/mm_inline.h>
+
+#include "internal.h"
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
@@ -414,12 +417,17 @@ void dec_zone_page_state(struct page *page, enum zone_stat_item item)
 EXPORT_SYMBOL(dec_zone_page_state);
 #endif
 
+static inline void fold_diff(int *diff)
+{
+       int i;
+
+       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+               if (diff[i])
+                       atomic_long_add(diff[i], &vm_stat[i]);
+}
+
 /*
- * Update the zone counters for one cpu.
- *
- * The cpu specified must be either the current cpu or a processor that
- * is not online. If it is the current cpu then the execution thread must
- * be pinned to the current cpu.
+ * Update the zone counters for the current cpu.
  *
  * Note that refresh_cpu_vm_stats strives to only access
  * node local memory. The per cpu pagesets on remote zones are placed
@@ -432,33 +440,29 @@ EXPORT_SYMBOL(dec_zone_page_state);
  * with the global counters. These could cause remote node cache line
  * bouncing and will have to be only done when necessary.
  */
-void refresh_cpu_vm_stats(int cpu)
+static void refresh_cpu_vm_stats(void)
 {
        struct zone *zone;
        int i;
        int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
 
        for_each_populated_zone(zone) {
-               struct per_cpu_pageset *p;
+               struct per_cpu_pageset __percpu *p = zone->pageset;
 
-               p = per_cpu_ptr(zone->pageset, cpu);
+               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+                       int v;
 
-               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-                       if (p->vm_stat_diff[i]) {
-                               unsigned long flags;
-                               int v;
+                       v = this_cpu_xchg(p->vm_stat_diff[i], 0);
+                       if (v) {
 
-                               local_irq_save(flags);
-                               v = p->vm_stat_diff[i];
-                               p->vm_stat_diff[i] = 0;
-                               local_irq_restore(flags);
                                atomic_long_add(v, &zone->vm_stat[i]);
                                global_diff[i] += v;
 #ifdef CONFIG_NUMA
                                /* 3 seconds idle till flush */
-                               p->expire = 3;
+                               __this_cpu_write(p->expire, 3);
 #endif
                        }
+               }
                cond_resched();
 #ifdef CONFIG_NUMA
                /*
@@ -468,29 +472,57 @@ void refresh_cpu_vm_stats(int cpu)
                 * Check if there are pages remaining in this pageset
                 * if not then there is nothing to expire.
                 */
-               if (!p->expire || !p->pcp.count)
+               if (!__this_cpu_read(p->expire) ||
+                              !__this_cpu_read(p->pcp.count))
                        continue;
 
                /*
                 * We never drain zones local to this processor.
                 */
                if (zone_to_nid(zone) == numa_node_id()) {
-                       p->expire = 0;
+                       __this_cpu_write(p->expire, 0);
                        continue;
                }
 
-               p->expire--;
-               if (p->expire)
+
+               if (__this_cpu_dec_return(p->expire))
                        continue;
 
-               if (p->pcp.count)
-                       drain_zone_pages(zone, &p->pcp);
+               if (__this_cpu_read(p->pcp.count))
+                       drain_zone_pages(zone, __this_cpu_ptr(&p->pcp));
 #endif
        }
+       fold_diff(global_diff);
+}
 
-       for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
-               if (global_diff[i])
-                       atomic_long_add(global_diff[i], &vm_stat[i]);
+/*
+ * Fold the data for an offline cpu into the global array.
+ * There cannot be any access by the offline cpu and therefore
+ * synchronization is simplified.
+ */
+void cpu_vm_stats_fold(int cpu)
+{
+       struct zone *zone;
+       int i;
+       int global_diff[NR_VM_ZONE_STAT_ITEMS] = { 0, };
+
+       for_each_populated_zone(zone) {
+               struct per_cpu_pageset *p;
+
+               p = per_cpu_ptr(zone->pageset, cpu);
+
+               for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++)
+                       if (p->vm_stat_diff[i]) {
+                               int v;
+
+                               v = p->vm_stat_diff[i];
+                               p->vm_stat_diff[i] = 0;
+                               atomic_long_add(v, &zone->vm_stat[i]);
+                               global_diff[i] += v;
+                       }
+       }
+
+       fold_diff(global_diff);
 }
 
 /*
@@ -703,6 +735,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 const char * const vmstat_text[] = {
        /* Zoned VM counters */
        "nr_free_pages",
+       "nr_alloc_batch",
        "nr_inactive_anon",
        "nr_active_anon",
        "nr_inactive_file",
@@ -817,6 +850,12 @@ const char * const vmstat_text[] = {
        "thp_zero_page_alloc",
        "thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_SMP
+       "nr_tlb_remote_flush",
+       "nr_tlb_remote_flush_received",
+#endif
+       "nr_tlb_local_flush_all",
+       "nr_tlb_local_flush_one",
 
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
@@ -1052,7 +1091,7 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n  all_unreclaimable: %u"
                   "\n  start_pfn:         %lu"
                   "\n  inactive_ratio:    %u",
-                  zone->all_unreclaimable,
+                  !zone_reclaimable(zone),
                   zone->zone_start_pfn,
                   zone->inactive_ratio);
        seq_putc(m, '\n');
@@ -1177,7 +1216,7 @@ int sysctl_stat_interval __read_mostly = HZ;
 
 static void vmstat_update(struct work_struct *w)
 {
-       refresh_cpu_vm_stats(smp_processor_id());
+       refresh_cpu_vm_stats();
        schedule_delayed_work(&__get_cpu_var(vmstat_work),
                round_jiffies_relative(sysctl_stat_interval));
 }
index ad1e781..9451361 100644 (file)
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -16,7 +16,7 @@
  *
  * zbud works by storing compressed pages, or "zpages", together in pairs in a
  * single memory page called a "zbud page".  The first buddy is "left
- * justifed" at the beginning of the zbud page, and the last buddy is "right
+ * justified" at the beginning of the zbud page, and the last buddy is "right
  * justified" at the end of the zbud page.  The benefit is that if either
  * buddy is freed, the freed buddy space, coalesced with whatever slack space
  * that existed between the buddies, results in the largest possible free region
@@ -243,7 +243,7 @@ void zbud_destroy_pool(struct zbud_pool *pool)
  * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
  * as zbud pool pages.
  *
- * Return: 0 if success and handle is set, otherwise -EINVAL is the size or
+ * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
  * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  * a new page.
  */
index deda2b6..841e35f 100644 (file)
@@ -409,7 +409,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
                                struct page **retpage)
 {
        struct page *found_page, *new_page = NULL;
-       struct address_space *swapper_space = &swapper_spaces[swp_type(entry)];
+       struct address_space *swapper_space = swap_address_space(entry);
        int err;
 
        *retpage = NULL;
@@ -790,26 +790,14 @@ static void zswap_frontswap_invalidate_page(unsigned type, pgoff_t offset)
 static void zswap_frontswap_invalidate_area(unsigned type)
 {
        struct zswap_tree *tree = zswap_trees[type];
-       struct rb_node *node;
-       struct zswap_entry *entry;
+       struct zswap_entry *entry, *n;
 
        if (!tree)
                return;
 
        /* walk the tree and free everything */
        spin_lock(&tree->lock);
-       /*
-        * TODO: Even though this code should not be executed because
-        * the try_to_unuse() in swapoff should have emptied the tree,
-        * it is very wasteful to rebalance the tree after every
-        * removal when we are freeing the whole tree.
-        *
-        * If post-order traversal code is ever added to the rbtree
-        * implementation, it should be used here.
-        */
-       while ((node = rb_first(&tree->rbroot))) {
-               entry = rb_entry(node, struct zswap_entry, rbnode);
-               rb_erase(&entry->rbnode, &tree->rbroot);
+       rbtree_postorder_for_each_entry_safe(entry, n, &tree->rbroot, rbnode) {
                zbud_free(tree->pool, entry->handle);
                zswap_entry_cache_free(entry);
                atomic_dec(&zswap_stored_pages);
index ba93bda..ee8fd6b 100644 (file)
@@ -987,6 +987,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 {
        int err;
        struct p9_client *clnt;
+       char *client_id;
 
        err = 0;
        clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -995,6 +996,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
 
        clnt->trans_mod = NULL;
        clnt->trans = NULL;
+
+       client_id = utsname()->nodename;
+       memcpy(clnt->name, client_id, strlen(client_id) + 1);
+
        spin_lock_init(&clnt->lock);
        INIT_LIST_HEAD(&clnt->fidlist);
 
index e1c26b1..990afab 100644 (file)
@@ -577,6 +577,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
        mutex_lock(&virtio_9p_lock);
        list_add_tail(&chan->chan_list, &virtio_chan_list);
        mutex_unlock(&virtio_9p_lock);
+
+       /* Let udev rules use the new mount_tag attribute. */
+       kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
        return 0;
 
 out_free_tag:
@@ -654,6 +658,7 @@ static void p9_virtio_remove(struct virtio_device *vdev)
        list_del(&chan->chan_list);
        mutex_unlock(&virtio_9p_lock);
        sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+       kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
        kfree(chan->tag);
        kfree(chan->vc_wq);
        kfree(chan);
index ee02136..b50dacc 100644 (file)
@@ -228,7 +228,7 @@ config RPS
 
 config RFS_ACCEL
        boolean
-       depends on RPS && GENERIC_HARDIRQS
+       depends on RPS
        select CPU_RMAP
        default y
 
index 0ff42f0..1929af8 100644 (file)
@@ -352,7 +352,7 @@ u16 __netdev_pick_tx(struct net_device *dev, struct sk_buff *skb)
 
                if (queue_index != new_index && sk &&
                    rcu_access_pointer(sk->sk_dst_cache))
-                       sk_tx_queue_set(sk, queue_index);
+                       sk_tx_queue_set(sk, new_index);
 
                queue_index = new_index;
        }
index 8a57d79..559d4ae 100644 (file)
@@ -87,8 +87,8 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
        if (!cg_proto)
                return -EINVAL;
 
-       if (val > RESOURCE_MAX)
-               val = RESOURCE_MAX;
+       if (val > RES_COUNTER_MAX)
+               val = RES_COUNTER_MAX;
 
        tcp = tcp_from_cgproto(cg_proto);
 
@@ -101,9 +101,9 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
                tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
                                             net->ipv4.sysctl_tcp_mem[i]);
 
-       if (val == RESOURCE_MAX)
+       if (val == RES_COUNTER_MAX)
                clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
-       else if (val != RESOURCE_MAX) {
+       else if (val != RES_COUNTER_MAX) {
                /*
                 * The active bit needs to be written after the static_key
                 * update. This is what guarantees that the socket activation
@@ -187,7 +187,7 @@ static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 
        switch (cft->private) {
        case RES_LIMIT:
-               val = tcp_read_stat(memcg, RES_LIMIT, RESOURCE_MAX);
+               val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
                break;
        case RES_USAGE:
                val = tcp_read_usage(memcg);
index 136fe55..7c96100 100644 (file)
@@ -915,6 +915,9 @@ static int __init inet6_init(void)
        err = ip6_route_init();
        if (err)
                goto ip6_route_fail;
+       err = ndisc_late_init();
+       if (err)
+               goto ndisc_late_fail;
        err = ip6_flowlabel_init();
        if (err)
                goto ip6_flowlabel_fail;
@@ -981,6 +984,8 @@ ipv6_exthdrs_fail:
 addrconf_fail:
        ip6_flowlabel_cleanup();
 ip6_flowlabel_fail:
+       ndisc_late_cleanup();
+ndisc_late_fail:
        ip6_route_cleanup();
 ip6_route_fail:
 #ifdef CONFIG_PROC_FS
@@ -1043,6 +1048,7 @@ static void __exit inet6_exit(void)
        ipv6_exthdrs_exit();
        addrconf_cleanup();
        ip6_flowlabel_cleanup();
+       ndisc_late_cleanup();
        ip6_route_cleanup();
 #ifdef CONFIG_PROC_FS
 
index 07a7d65..8d67900 100644 (file)
@@ -162,12 +162,6 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb)
                off += optlen;
                len -= optlen;
        }
-       /* This case will not be caught by above check since its padding
-        * length is smaller than 7:
-        * 1 byte NH + 1 byte Length + 6 bytes Padding
-        */
-       if ((padlen == 6) && ((off - skb_network_header_len(skb)) == 8))
-               goto bad;
 
        if (len == 0)
                return true;
index a6c58ce..e275916 100644 (file)
@@ -138,8 +138,8 @@ static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg
        return false;
 
 suppress_route:
-               ip6_rt_put(rt);
-               return true;
+       ip6_rt_put(rt);
+       return true;
 }
 
 static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
index 73db48e..5bec666 100644 (file)
@@ -825,9 +825,9 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)
        fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen,
                        offsetof(struct rt6_info, rt6i_dst), allow_create,
                        replace_required);
-
        if (IS_ERR(fn)) {
                err = PTR_ERR(fn);
+               fn = NULL;
                goto out;
        }
 
index 1217945..f8a55ff 100644 (file)
@@ -1727,24 +1727,28 @@ int __init ndisc_init(void)
        if (err)
                goto out_unregister_pernet;
 #endif
-       err = register_netdevice_notifier(&ndisc_netdev_notifier);
-       if (err)
-               goto out_unregister_sysctl;
 out:
        return err;
 
-out_unregister_sysctl:
 #ifdef CONFIG_SYSCTL
-       neigh_sysctl_unregister(&nd_tbl.parms);
 out_unregister_pernet:
-#endif
        unregister_pernet_subsys(&ndisc_net_ops);
        goto out;
+#endif
 }
 
-void ndisc_cleanup(void)
+int __init ndisc_late_init(void)
+{
+       return register_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_late_cleanup(void)
 {
        unregister_netdevice_notifier(&ndisc_netdev_notifier);
+}
+
+void ndisc_cleanup(void)
+{
 #ifdef CONFIG_SYSCTL
        neigh_sysctl_unregister(&nd_tbl.parms);
 #endif
index fb36f85..410db90 100644 (file)
@@ -1178,6 +1178,7 @@ static int __parse_flow_nlattrs(const struct nlattr *attr,
                if (type > OVS_KEY_ATTR_MAX) {
                        OVS_NLERR("Unknown key attribute (type=%d, max=%d).\n",
                                  type, OVS_KEY_ATTR_MAX);
+                       return -EINVAL;
                }
 
                if (attrs & (1 << type)) {
index c2178b1..863846c 100644 (file)
@@ -1495,7 +1495,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
        psched_ratecfg_precompute(&cl->ceil, &hopt->ceil);
 
        cl->buffer = PSCHED_TICKS2NS(hopt->buffer);
-       cl->cbuffer = PSCHED_TICKS2NS(hopt->buffer);
+       cl->cbuffer = PSCHED_TICKS2NS(hopt->cbuffer);
 
        sch_tree_unlock(sch);
 
index d5d5882..911b71b 100644 (file)
@@ -806,6 +806,9 @@ static int sctp_send_asconf_del_ip(struct sock              *sk,
                        goto skip_mkasconf;
                }
 
+               if (laddr == NULL)
+                       return -EINVAL;
+
                /* We do not need RCU protection throughout this loop
                 * because this is done under a socket lock from the
                 * setsockopt call.
@@ -6176,7 +6179,7 @@ unsigned int sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
        /* Is there any exceptional events?  */
        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
                mask |= POLLERR |
-                       sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0;
+                       (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
        if (sk->sk_shutdown & RCV_SHUTDOWN)
                mask |= POLLRDHUP | POLLIN | POLLRDNORM;
        if (sk->sk_shutdown == SHUTDOWN_MASK)
index b2d7c62..ebed4b6 100644 (file)
@@ -854,11 +854,6 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_recvmsg);
 
-static void sock_aio_dtor(struct kiocb *iocb)
-{
-       kfree(iocb->private);
-}
-
 static ssize_t sock_sendpage(struct file *file, struct page *page,
                             int offset, size_t size, loff_t *ppos, int more)
 {
@@ -889,12 +884,8 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
                                         struct sock_iocb *siocb)
 {
-       if (!is_sync_kiocb(iocb)) {
-               siocb = kmalloc(sizeof(*siocb), GFP_KERNEL);
-               if (!siocb)
-                       return NULL;
-               iocb->ki_dtor = sock_aio_dtor;
-       }
+       if (!is_sync_kiocb(iocb))
+               BUG();
 
        siocb->kiocb = iocb;
        iocb->private = siocb;
@@ -931,7 +922,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
        if (pos != 0)
                return -ESPIPE;
 
-       if (iocb->ki_left == 0) /* Match SYS5 behaviour */
+       if (iocb->ki_nbytes == 0)       /* Match SYS5 behaviour */
                return 0;
 
 
@@ -3072,12 +3063,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 
        uifmap32 = &uifr32->ifr_ifru.ifru_map;
        err = copy_from_user(&ifr, uifr32, sizeof(ifr.ifr_name));
-       err |= __get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-       err |= __get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-       err |= __get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-       err |= __get_user(ifr.ifr_map.irq, &uifmap32->irq);
-       err |= __get_user(ifr.ifr_map.dma, &uifmap32->dma);
-       err |= __get_user(ifr.ifr_map.port, &uifmap32->port);
+       err |= get_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+       err |= get_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+       err |= get_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+       err |= get_user(ifr.ifr_map.irq, &uifmap32->irq);
+       err |= get_user(ifr.ifr_map.dma, &uifmap32->dma);
+       err |= get_user(ifr.ifr_map.port, &uifmap32->port);
        if (err)
                return -EFAULT;
 
@@ -3088,12 +3079,12 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 
        if (cmd == SIOCGIFMAP && !err) {
                err = copy_to_user(uifr32, &ifr, sizeof(ifr.ifr_name));
-               err |= __put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
-               err |= __put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
-               err |= __put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
-               err |= __put_user(ifr.ifr_map.irq, &uifmap32->irq);
-               err |= __put_user(ifr.ifr_map.dma, &uifmap32->dma);
-               err |= __put_user(ifr.ifr_map.port, &uifmap32->port);
+               err |= put_user(ifr.ifr_map.mem_start, &uifmap32->mem_start);
+               err |= put_user(ifr.ifr_map.mem_end, &uifmap32->mem_end);
+               err |= put_user(ifr.ifr_map.base_addr, &uifmap32->base_addr);
+               err |= put_user(ifr.ifr_map.irq, &uifmap32->irq);
+               err |= put_user(ifr.ifr_map.dma, &uifmap32->dma);
+               err |= put_user(ifr.ifr_map.port, &uifmap32->port);
                if (err)
                        err = -EFAULT;
        }
@@ -3167,25 +3158,25 @@ static int routing_ioctl(struct net *net, struct socket *sock,
                struct in6_rtmsg32 __user *ur6 = argp;
                ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
                        3 * sizeof(struct in6_addr));
-               ret |= __get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
-               ret |= __get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-               ret |= __get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-               ret |= __get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
-               ret |= __get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
-               ret |= __get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
-               ret |= __get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
+               ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
+               ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
+               ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
+               ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
+               ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
+               ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
+               ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
 
                r = (void *) &r6;
        } else { /* ipv4 */
                struct rtentry32 __user *ur4 = argp;
                ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
                                        3 * sizeof(struct sockaddr));
-               ret |= __get_user(r4.rt_flags, &(ur4->rt_flags));
-               ret |= __get_user(r4.rt_metric, &(ur4->rt_metric));
-               ret |= __get_user(r4.rt_mtu, &(ur4->rt_mtu));
-               ret |= __get_user(r4.rt_window, &(ur4->rt_window));
-               ret |= __get_user(r4.rt_irtt, &(ur4->rt_irtt));
-               ret |= __get_user(rtdev, &(ur4->rt_dev));
+               ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
+               ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
+               ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
+               ret |= get_user(r4.rt_window, &(ur4->rt_window));
+               ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
+               ret |= get_user(rtdev, &(ur4->rt_dev));
                if (rtdev) {
                        ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
                        r4.rt_dev = (char __user __force *)devname;
index 4151590..5285ead 100644 (file)
@@ -434,12 +434,13 @@ EXPORT_SYMBOL_GPL(rpcauth_destroy_credcache);
 /*
  * Remove stale credentials. Avoid sleeping inside the loop.
  */
-static int
+static long
 rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
 {
        spinlock_t *cache_lock;
        struct rpc_cred *cred, *next;
        unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
+       long freed = 0;
 
        list_for_each_entry_safe(cred, next, &cred_unused, cr_lru) {
 
@@ -451,10 +452,11 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
                 */
                if (time_in_range(cred->cr_expire, expired, jiffies) &&
                    test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0)
-                       return 0;
+                       break;
 
                list_del_init(&cred->cr_lru);
                number_cred_unused--;
+               freed++;
                if (atomic_read(&cred->cr_count) != 0)
                        continue;
 
@@ -467,29 +469,39 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
                }
                spin_unlock(cache_lock);
        }
-       return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
+       return freed;
 }
 
 /*
  * Run memory cache shrinker.
  */
-static int
-rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
+static unsigned long
+rpcauth_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+
 {
        LIST_HEAD(free);
-       int res;
-       int nr_to_scan = sc->nr_to_scan;
-       gfp_t gfp_mask = sc->gfp_mask;
+       unsigned long freed;
+
+       if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+               return SHRINK_STOP;
 
-       if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
-               return (nr_to_scan == 0) ? 0 : -1;
+       /* nothing left, don't come back */
        if (list_empty(&cred_unused))
-               return 0;
+               return SHRINK_STOP;
+
        spin_lock(&rpc_credcache_lock);
-       res = rpcauth_prune_expired(&free, nr_to_scan);
+       freed = rpcauth_prune_expired(&free, sc->nr_to_scan);
        spin_unlock(&rpc_credcache_lock);
        rpcauth_destroy_credlist(&free);
-       return res;
+
+       return freed;
+}
+
+static unsigned long
+rpcauth_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+
+{
+       return (number_cred_unused / 100) * sysctl_vfs_cache_pressure;
 }
 
 /*
@@ -805,7 +817,8 @@ rpcauth_uptodatecred(struct rpc_task *task)
 }
 
 static struct shrinker rpc_cred_shrinker = {
-       .shrink = rpcauth_cache_shrinker,
+       .count_objects = rpcauth_cache_shrink_count,
+       .scan_objects = rpcauth_cache_shrink_scan,
        .seeks = DEFAULT_SEEKS,
 };
 
index f6d84be..ed04869 100644 (file)
@@ -239,7 +239,7 @@ generic_key_timeout(struct rpc_auth *auth, struct rpc_cred *cred)
                if (test_and_clear_bit(RPC_CRED_KEY_EXPIRE_SOON,
                                        &acred->ac_flags))
                        dprintk("RPC:        UID %d Credential key reset\n",
-                               tcred->cr_uid);
+                               from_kuid(&init_user_ns, tcred->cr_uid));
                /* set up fasttrack for the normal case */
                set_bit(RPC_CRED_NOTIFY_TIMEOUT, &acred->ac_flags);
        }
index 30eb502..fcac5d1 100644 (file)
@@ -75,7 +75,7 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
  * using integrity (two 4-byte integers): */
 #define GSS_VERF_SLACK         100
 
-static DEFINE_HASHTABLE(gss_auth_hash_table, 16);
+static DEFINE_HASHTABLE(gss_auth_hash_table, 4);
 static DEFINE_SPINLOCK(gss_auth_hash_lock);
 
 struct gss_pipe {
index 2ee9eb7..47016c3 100755 (executable)
@@ -31,12 +31,16 @@ my $show_types = 0;
 my $fix = 0;
 my $root;
 my %debug;
-my %ignore_type = ();
 my %camelcase = ();
+my %use_type = ();
+my @use = ();
+my %ignore_type = ();
 my @ignore = ();
 my $help = 0;
 my $configuration_file = ".checkpatch.conf";
 my $max_line_length = 80;
+my $ignore_perl_version = 0;
+my $minimum_perl_version = 5.10.0;
 
 sub help {
        my ($exitcode) = @_;
@@ -54,6 +58,7 @@ Options:
   --terse                    one line per report
   -f, --file                 treat FILE as regular source file
   --subjective, --strict     enable more subjective tests
+  --types TYPE(,TYPE2...)    show only these comma separated message types
   --ignore TYPE(,TYPE2...)   ignore various comma separated message types
   --max-line-length=n        set the maximum line length, if exceeded, warn
   --show-types               show the message "types" in the output
@@ -71,6 +76,8 @@ Options:
                              "<inputfile>.EXPERIMENTAL-checkpatch-fixes"
                              with potential errors corrected to the preferred
                              checkpatch style
+  --ignore-perl-version      override checking of perl version.  expect
+                             runtime errors.
   -h, --help, --version      display this help and exit
 
 When FILE is - read standard input.
@@ -116,6 +123,7 @@ GetOptions(
        'subjective!'   => \$check,
        'strict!'       => \$check,
        'ignore=s'      => \@ignore,
+       'types=s'       => \@use,
        'show-types!'   => \$show_types,
        'max-line-length=i' => \$max_line_length,
        'root=s'        => \$root,
@@ -123,6 +131,7 @@ GetOptions(
        'mailback!'     => \$mailback,
        'summary-file!' => \$summary_file,
        'fix!'          => \$fix,
+       'ignore-perl-version!' => \$ignore_perl_version,
        'debug=s'       => \%debug,
        'test-only=s'   => \$tst_only,
        'h|help'        => \$help,
@@ -133,24 +142,50 @@ help(0) if ($help);
 
 my $exit = 0;
 
+if ($^V && $^V lt $minimum_perl_version) {
+       printf "$P: requires at least perl version %vd\n", $minimum_perl_version;
+       if (!$ignore_perl_version) {
+               exit(1);
+       }
+}
+
 if ($#ARGV < 0) {
        print "$P: no input files\n";
        exit(1);
 }
 
-@ignore = split(/,/, join(',',@ignore));
-foreach my $word (@ignore) {
-       $word =~ s/\s*\n?$//g;
-       $word =~ s/^\s*//g;
-       $word =~ s/\s+/ /g;
-       $word =~ tr/[a-z]/[A-Z]/;
+sub hash_save_array_words {
+       my ($hashRef, $arrayRef) = @_;
+
+       my @array = split(/,/, join(',', @$arrayRef));
+       foreach my $word (@array) {
+               $word =~ s/\s*\n?$//g;
+               $word =~ s/^\s*//g;
+               $word =~ s/\s+/ /g;
+               $word =~ tr/[a-z]/[A-Z]/;
+
+               next if ($word =~ m/^\s*#/);
+               next if ($word =~ m/^\s*$/);
 
-       next if ($word =~ m/^\s*#/);
-       next if ($word =~ m/^\s*$/);
+               $hashRef->{$word}++;
+       }
+}
 
-       $ignore_type{$word}++;
+sub hash_show_words {
+       my ($hashRef, $prefix) = @_;
+
+       if ($quiet == 0 && keys %$hashRef) {
+               print "NOTE: $prefix message types:";
+               foreach my $word (sort keys %$hashRef) {
+                       print " $word";
+               }
+               print "\n\n";
+       }
 }
 
+hash_save_array_words(\%ignore_type, \@ignore);
+hash_save_array_words(\%use_type, \@use);
+
 my $dbg_values = 0;
 my $dbg_possible = 0;
 my $dbg_type = 0;
@@ -207,6 +242,8 @@ our $Sparse = qr{
                        __rcu
                }x;
 
+our $InitAttribute = qr{__(?:mem|cpu|dev|net_|)(?:initdata|initconst|init\b)};
+
 # Notes to $Attribute:
 # We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
 our $Attribute = qr{
@@ -227,7 +264,7 @@ our $Attribute      = qr{
                        __deprecated|
                        __read_mostly|
                        __kprobes|
-                       __(?:mem|cpu|dev|)(?:initdata|initconst|init\b)|
+                       $InitAttribute|
                        ____cacheline_aligned|
                        ____cacheline_aligned_in_smp|
                        ____cacheline_internodealigned_in_smp|
@@ -257,6 +294,7 @@ our $Operators      = qr{
                  }x;
 
 our $NonptrType;
+our $NonptrTypeWithAttr;
 our $Type;
 our $Declare;
 
@@ -319,6 +357,12 @@ our @typeList = (
        qr{${Ident}_handler},
        qr{${Ident}_handler_fn},
 );
+our @typeListWithAttr = (
+       @typeList,
+       qr{struct\s+$InitAttribute\s+$Ident},
+       qr{union\s+$InitAttribute\s+$Ident},
+);
+
 our @modifierList = (
        qr{fastcall},
 );
@@ -332,6 +376,7 @@ our $allowed_asm_includes = qr{(?x:
 sub build_types {
        my $mods = "(?x:  \n" . join("|\n  ", @modifierList) . "\n)";
        my $all = "(?x:  \n" . join("|\n  ", @typeList) . "\n)";
+       my $allWithAttr = "(?x:  \n" . join("|\n  ", @typeListWithAttr) . "\n)";
        $Modifier       = qr{(?:$Attribute|$Sparse|$mods)};
        $NonptrType     = qr{
                        (?:$Modifier\s+|const\s+)*
@@ -342,6 +387,15 @@ sub build_types {
                        )
                        (?:\s+$Modifier|\s+const)*
                  }x;
+       $NonptrTypeWithAttr     = qr{
+                       (?:$Modifier\s+|const\s+)*
+                       (?:
+                               (?:typeof|__typeof__)\s*\([^\)]*\)|
+                               (?:$typeTypedefs\b)|
+                               (?:${allWithAttr}\b)
+                       )
+                       (?:\s+$Modifier|\s+const)*
+                 }x;
        $Type   = qr{
                        $NonptrType
                        (?:(?:\s|\*|\[\])+\s*const|(?:\s|\*|\[\])+|(?:\s*\[\s*\])+)?
@@ -1355,7 +1409,9 @@ sub possible {
 my $prefix = '';
 
 sub show_type {
-       return !defined $ignore_type{$_[0]};
+       return defined $use_type{$_[0]} if (scalar keys %use_type > 0);
+
+       return !defined $ignore_type{$_[0]};
 }
 
 sub report {
@@ -1435,7 +1491,23 @@ sub check_absolute_file {
 sub trim {
        my ($string) = @_;
 
-       $string =~ s/(^\s+|\s+$)//g;
+       $string =~ s/^\s+|\s+$//g;
+
+       return $string;
+}
+
+sub ltrim {
+       my ($string) = @_;
+
+       $string =~ s/^\s+//;
+
+       return $string;
+}
+
+sub rtrim {
+       my ($string) = @_;
+
+       $string =~ s/\s+$//;
 
        return $string;
 }
@@ -1532,6 +1604,7 @@ sub process {
        my %suppress_export;
        my $suppress_statement = 0;
 
+       my %signatures = ();
 
        # Pre-scan the patch sanitizing the lines.
        # Pre-scan the patch looking for any __setup documentation.
@@ -1624,6 +1697,8 @@ sub process {
        $linenr = 0;
        foreach my $line (@lines) {
                $linenr++;
+               my $sline = $line;      #copy of $line
+               $sline =~ s/$;/ /g;     #with comments as spaces
 
                my $rawline = $rawlines[$linenr - 1];
 
@@ -1781,6 +1856,17 @@ sub process {
                                             "email address '$email' might be better as '$suggested_email$comment'\n" . $herecurr);
                                }
                        }
+
+# Check for duplicate signatures
+                       my $sig_nospace = $line;
+                       $sig_nospace =~ s/\s//g;
+                       $sig_nospace = lc($sig_nospace);
+                       if (defined $signatures{$sig_nospace}) {
+                               WARN("BAD_SIGN_OFF",
+                                    "Duplicate signature\n" . $herecurr);
+                       } else {
+                               $signatures{$sig_nospace} = 1;
+                       }
                }
 
 # Check for wrappage within a valid hunk of the file
@@ -1845,15 +1931,17 @@ sub process {
 #trailing whitespace
                if ($line =~ /^\+.*\015/) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
-                       ERROR("DOS_LINE_ENDINGS",
-                             "DOS line endings\n" . $herevet);
-
+                       if (ERROR("DOS_LINE_ENDINGS",
+                                 "DOS line endings\n" . $herevet) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/[\s\015]+$//;
+                       }
                } elsif ($rawline =~ /^\+.*\S\s+$/ || $rawline =~ /^\+\s+$/) {
                        my $herevet = "$here\n" . cat_vet($rawline) . "\n";
                        if (ERROR("TRAILING_WHITESPACE",
                                  "trailing whitespace\n" . $herevet) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~ s/^(\+.*?)\s+$/$1/;
+                               $fixed[$linenr - 1] =~ s/\s+$//;
                        }
 
                        $rpt_cleaners = 1;
@@ -2060,6 +2148,7 @@ sub process {
                if ($realfile =~ m@^(drivers/net/|net/)@ &&
                    $prevrawline =~ /^\+[ \t]*\/\*/ &&          #starting /*
                    $prevrawline !~ /\*\/[ \t]*$/ &&            #no trailing */
+                   $rawline =~ /^\+/ &&                        #line is new
                    $rawline !~ /^\+[ \t]*\*/) {                #no leading *
                        WARN("NETWORKING_BLOCK_COMMENT_STYLE",
                             "networking block comments start with * on subsequent lines\n" . $hereprev);
@@ -2126,7 +2215,7 @@ sub process {
                    $realline_next);
 #print "LINE<$line>\n";
                if ($linenr >= $suppress_statement &&
-                   $realcnt && $line =~ /.\s*\S/) {
+                   $realcnt && $sline =~ /.\s*\S/) {
                        ($stat, $cond, $line_nr_next, $remain_next, $off_next) =
                                ctx_statement_block($linenr, $realcnt, 0);
                        $stat =~ s/\n./\n /g;
@@ -2486,16 +2575,22 @@ sub process {
                }
 
 # check for global initialisers.
-               if ($line =~ /^.$Type\s*$Ident\s*(?:\s+$Modifier)*\s*=\s*(0|NULL|false)\s*;/) {
-                       ERROR("GLOBAL_INITIALISERS",
-                             "do not initialise globals to 0 or NULL\n" .
-                               $herecurr);
+               if ($line =~ /^\+(\s*$Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/) {
+                       if (ERROR("GLOBAL_INITIALISERS",
+                                 "do not initialise globals to 0 or NULL\n" .
+                                     $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/($Type\s*$Ident\s*(?:\s+$Modifier))*\s*=\s*(0|NULL|false)\s*;/$1;/;
+                       }
                }
 # check for static initialisers.
-               if ($line =~ /\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
-                       ERROR("INITIALISED_STATIC",
-                             "do not initialise statics to 0 or NULL\n" .
-                               $herecurr);
+               if ($line =~ /^\+.*\bstatic\s.*=\s*(0|NULL|false)\s*;/) {
+                       if (ERROR("INITIALISED_STATIC",
+                                 "do not initialise statics to 0 or NULL\n" .
+                                     $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/(\bstatic\s.*?)\s*=\s*(0|NULL|false)\s*;/$1;/;
+                       }
                }
 
 # check for static const char * arrays.
@@ -2638,8 +2733,12 @@ sub process {
                }
 
                if ($line =~ /\bpr_warning\s*\(/) {
-                       WARN("PREFER_PR_LEVEL",
-                            "Prefer pr_warn(... to pr_warning(...\n" . $herecurr);
+                       if (WARN("PREFER_PR_LEVEL",
+                                "Prefer pr_warn(... to pr_warning(...\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~
+                                   s/\bpr_warning\b/pr_warn/;
+                       }
                }
 
                if ($line =~ /\bdev_printk\s*\(\s*KERN_([A-Z]+)/) {
@@ -2759,6 +2858,7 @@ sub process {
                        $off = 0;
 
                        my $blank = copy_spacing($opline);
+                       my $last_after = -1;
 
                        for (my $n = 0; $n < $#elements; $n += 2) {
 
@@ -2824,7 +2924,7 @@ sub process {
                                            $cc !~ /^\\/ && $cc !~ /^;/) {
                                                if (ERROR("SPACING",
                                                          "space required after that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+                                                       $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
                                                        $line_fixed = 1;
                                                }
                                        }
@@ -2839,11 +2939,11 @@ sub process {
                                        if ($ctx =~ /Wx.|.xW/) {
                                                if (ERROR("SPACING",
                                                          "spaces prohibited around that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
-                                                       $line_fixed = 1;
+                                                       $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
                                                        if (defined $fix_elements[$n + 2]) {
                                                                $fix_elements[$n + 2] =~ s/^\s+//;
                                                        }
+                                                       $line_fixed = 1;
                                                }
                                        }
 
@@ -2852,8 +2952,9 @@ sub process {
                                        if ($ctx !~ /.x[WEC]/ && $cc !~ /^}/) {
                                                if (ERROR("SPACING",
                                                          "space required after that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " ";
+                                                       $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
                                                        $line_fixed = 1;
+                                                       $last_after = $n;
                                                }
                                        }
 
@@ -2870,8 +2971,10 @@ sub process {
                                        if ($ctx !~ /[WEBC]x./ && $ca !~ /(?:\)|!|~|\*|-|\&|\||\+\+|\-\-|\{)$/) {
                                                if (ERROR("SPACING",
                                                          "space required before that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]);
-                                                       $line_fixed = 1;
+                                                       if ($n != $last_after + 2) {
+                                                               $good = $fix_elements[$n] . " " . ltrim($fix_elements[$n + 1]);
+                                                               $line_fixed = 1;
+                                                       }
                                                }
                                        }
                                        if ($op eq '*' && $cc =~/\s*$Modifier\b/) {
@@ -2880,12 +2983,11 @@ sub process {
                                        } elsif ($ctx =~ /.xW/) {
                                                if (ERROR("SPACING",
                                                          "space prohibited after that '$op' $at\n" . $hereptr)) {
-                                                       $fixed_line =~ s/\s+$//;
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
-                                                       $line_fixed = 1;
+                                                       $good = $fix_elements[$n] . rtrim($fix_elements[$n + 1]);
                                                        if (defined $fix_elements[$n + 2]) {
                                                                $fix_elements[$n + 2] =~ s/^\s+//;
                                                        }
+                                                       $line_fixed = 1;
                                                }
                                        }
 
@@ -2894,8 +2996,7 @@ sub process {
                                        if ($ctx !~ /[WEOBC]x[^W]/ && $ctx !~ /[^W]x[WOBEC]/) {
                                                if (ERROR("SPACING",
                                                          "space required one side of that '$op' $at\n" . $hereptr)) {
-                                                       $fixed_line =~ s/\s+$//;
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]) . " ";
+                                                       $good = $fix_elements[$n] . trim($fix_elements[$n + 1]) . " ";
                                                        $line_fixed = 1;
                                                }
                                        }
@@ -2903,20 +3004,18 @@ sub process {
                                            ($ctx =~ /Wx./ && $cc =~ /^;/)) {
                                                if (ERROR("SPACING",
                                                          "space prohibited before that '$op' $at\n" . $hereptr)) {
-                                                       $fixed_line =~ s/\s+$//;
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+                                                       $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
                                                        $line_fixed = 1;
                                                }
                                        }
                                        if ($ctx =~ /ExW/) {
                                                if (ERROR("SPACING",
                                                          "space prohibited after that '$op' $at\n" . $hereptr)) {
-                                                       $fixed_line =~ s/\s+$//;
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
-                                                       $line_fixed = 1;
+                                                       $good = $fix_elements[$n] . trim($fix_elements[$n + 1]);
                                                        if (defined $fix_elements[$n + 2]) {
                                                                $fix_elements[$n + 2] =~ s/^\s+//;
                                                        }
+                                                       $line_fixed = 1;
                                                }
                                        }
 
@@ -2930,8 +3029,10 @@ sub process {
                                        if ($ctx =~ /Wx[^WCE]|[^WCE]xW/) {
                                                if (ERROR("SPACING",
                                                          "need consistent spacing around '$op' $at\n" . $hereptr)) {
-                                                       $fixed_line =~ s/\s+$//;
-                                                       $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+                                                       $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+                                                       if (defined $fix_elements[$n + 2]) {
+                                                               $fix_elements[$n + 2] =~ s/^\s+//;
+                                                       }
                                                        $line_fixed = 1;
                                                }
                                        }
@@ -2942,7 +3043,7 @@ sub process {
                                        if ($ctx =~ /Wx./) {
                                                if (ERROR("SPACING",
                                                          "space prohibited before that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
+                                                       $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
                                                        $line_fixed = 1;
                                                }
                                        }
@@ -2969,8 +3070,10 @@ sub process {
                                        if ($ok == 0) {
                                                if (ERROR("SPACING",
                                                          "spaces required around that '$op' $at\n" . $hereptr)) {
-                                                       $good = trim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
-                                                       $good = $fix_elements[$n] . " " . trim($fix_elements[$n + 1]) . " ";
+                                                       $good = rtrim($fix_elements[$n]) . " " . trim($fix_elements[$n + 1]) . " ";
+                                                       if (defined $fix_elements[$n + 2]) {
+                                                               $fix_elements[$n + 2] =~ s/^\s+//;
+                                                       }
                                                        $line_fixed = 1;
                                                }
                                        }
@@ -3031,8 +3134,7 @@ sub process {
                        if (ERROR("SPACING",
                                  "space required before the open brace '{'\n" . $herecurr) &&
                            $fix) {
-                               $fixed[$linenr - 1] =~
-                                   s/^(\+.*(?:do|\))){/$1 {/;
+                               $fixed[$linenr - 1] =~ s/^(\+.*(?:do|\))){/$1 {/;
                        }
                }
 
@@ -3047,8 +3149,12 @@ sub process {
 # closing brace should have a space following it when it has anything
 # on the line
                if ($line =~ /}(?!(?:,|;|\)))\S/) {
-                       ERROR("SPACING",
-                             "space required after that close brace '}'\n" . $herecurr);
+                       if (ERROR("SPACING",
+                                 "space required after that close brace '}'\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~
+                                   s/}((?!(?:,|;|\)))\S)/} $1/;
+                       }
                }
 
 # check spacing on square brackets
@@ -3271,8 +3377,13 @@ sub process {
 
 #gcc binary extension
                        if ($var =~ /^$Binary$/) {
-                               WARN("GCC_BINARY_CONSTANT",
-                                    "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr);
+                               if (WARN("GCC_BINARY_CONSTANT",
+                                        "Avoid gcc v4.3+ binary constant extension: <$var>\n" . $herecurr) &&
+                                   $fix) {
+                                       my $hexval = sprintf("0x%x", oct($var));
+                                       $fixed[$linenr - 1] =~
+                                           s/\b$var\b/$hexval/;
+                               }
                        }
 
 #CamelCase
@@ -3282,19 +3393,26 @@ sub process {
                            $var !~ /^(?:Clear|Set|TestClear|TestSet|)Page[A-Z]/ &&
 #Ignore SI style variants like nS, mV and dB (ie: max_uV, regulator_min_uA_show)
                            $var !~ /^(?:[a-z_]*?)_?[a-z][A-Z](?:_[a-z_]+)?$/) {
-                               seed_camelcase_includes() if ($check);
-                               if (!defined $camelcase{$var}) {
-                                       $camelcase{$var} = 1;
-                                       CHK("CAMELCASE",
-                                           "Avoid CamelCase: <$var>\n" . $herecurr);
+                               while ($var =~ m{($Ident)}g) {
+                                       my $word = $1;
+                                       next if ($word !~ /[A-Z][a-z]|[a-z][A-Z]/);
+                                       seed_camelcase_includes() if ($check);
+                                       if (!defined $camelcase{$word}) {
+                                               $camelcase{$word} = 1;
+                                               CHK("CAMELCASE",
+                                                   "Avoid CamelCase: <$word>\n" . $herecurr);
+                                       }
                                }
                        }
                }
 
 #no spaces allowed after \ in define
-               if ($line=~/\#\s*define.*\\\s$/) {
-                       WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
-                            "Whitepspace after \\ makes next lines useless\n" . $herecurr);
+               if ($line =~ /\#\s*define.*\\\s+$/) {
+                       if (WARN("WHITESPACE_AFTER_LINE_CONTINUATION",
+                                "Whitespace after \\ makes next lines useless\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\s+$//;
+                       }
                }
 
 #warn if <asm/foo.h> is #included and <linux/foo.h> is available (uses RAW line)
@@ -3374,7 +3492,8 @@ sub process {
                            $dstat !~ /^for\s*$Constant$/ &&                            # for (...)
                            $dstat !~ /^for\s*$Constant\s+(?:$Ident|-?$Constant)$/ &&   # for (...) bar()
                            $dstat !~ /^do\s*{/ &&                                      # do {...
-                           $dstat !~ /^\({/)                                           # ({...
+                           $dstat !~ /^\({/ &&                                         # ({...
+                           $ctx !~ /^.\s*#\s*define\s+TRACE_(?:SYSTEM|INCLUDE_FILE|INCLUDE_PATH)\b/)
                        {
                                $ctx =~ s/\n*$//;
                                my $herectx = $here . "\n";
@@ -3606,6 +3725,32 @@ sub process {
                        }
                }
 
+sub string_find_replace {
+       my ($string, $find, $replace) = @_;
+
+       $string =~ s/$find/$replace/g;
+
+       return $string;
+}
+
+# check for bad placement of section $InitAttribute (e.g.: __initdata)
+               if ($line =~ /(\b$InitAttribute\b)/) {
+                       my $attr = $1;
+                       if ($line =~ /^\+\s*static\s+(?:const\s+)?(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*[=;]/) {
+                               my $ptr = $1;
+                               my $var = $2;
+                               if ((($ptr =~ /\b(union|struct)\s+$attr\b/ &&
+                                     ERROR("MISPLACED_INIT",
+                                           "$attr should be placed after $var\n" . $herecurr)) ||
+                                    ($ptr !~ /\b(union|struct)\s+$attr\b/ &&
+                                     WARN("MISPLACED_INIT",
+                                          "$attr should be placed after $var\n" . $herecurr))) &&
+                                   $fix) {
+                                       $fixed[$linenr - 1] =~ s/(\bstatic\s+(?:const\s+)?)(?:$attr\s+)?($NonptrTypeWithAttr)\s+(?:$attr\s+)?($Ident(?:\[[^]]*\])?)\s*([=;])\s*/"$1" . trim(string_find_replace($2, "\\s*$attr\\s*", " ")) . " " . trim(string_find_replace($3, "\\s*$attr\\s*", "")) . " $attr" . ("$4" eq ";" ? ";" : " = ")/e;
+                               }
+                       }
+               }
+
 # prefer usleep_range over udelay
                if ($line =~ /\budelay\s*\(\s*(\d+)\s*\)/) {
                        # ignore udelay's < 10, however
@@ -3691,8 +3836,12 @@ sub process {
 
 # Check for __inline__ and __inline, prefer inline
                if ($line =~ /\b(__inline__|__inline)\b/) {
-                       WARN("INLINE",
-                            "plain inline is preferred over $1\n" . $herecurr);
+                       if (WARN("INLINE",
+                                "plain inline is preferred over $1\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\b(__inline__|__inline)\b/inline/;
+
+                       }
                }
 
 # Check for __attribute__ packed, prefer __packed
@@ -3709,14 +3858,21 @@ sub process {
 
 # Check for __attribute__ format(printf, prefer __printf
                if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf/) {
-                       WARN("PREFER_PRINTF",
-                            "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr);
+                       if (WARN("PREFER_PRINTF",
+                                "__printf(string-index, first-to-check) is preferred over __attribute__((format(printf, string-index, first-to-check)))\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*printf\s*,\s*(.*)\)\s*\)\s*\)/"__printf(" . trim($1) . ")"/ex;
+
+                       }
                }
 
 # Check for __attribute__ format(scanf, prefer __scanf
                if ($line =~ /\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\b/) {
-                       WARN("PREFER_SCANF",
-                            "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr);
+                       if (WARN("PREFER_SCANF",
+                                "__scanf(string-index, first-to-check) is preferred over __attribute__((format(scanf, string-index, first-to-check)))\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\b__attribute__\s*\(\s*\(\s*format\s*\(\s*scanf\s*,\s*(.*)\)\s*\)\s*\)/"__scanf(" . trim($1) . ")"/ex;
+                       }
                }
 
 # check for sizeof(&)
@@ -3727,8 +3883,11 @@ sub process {
 
 # check for sizeof without parenthesis
                if ($line =~ /\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/) {
-                       WARN("SIZEOF_PARENTHESIS",
-                            "sizeof $1 should be sizeof($1)\n" . $herecurr);
+                       if (WARN("SIZEOF_PARENTHESIS",
+                                "sizeof $1 should be sizeof($1)\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\bsizeof\s+((?:\*\s*|)$Lval|$Type(?:\s+$Lval|))/"sizeof(" . trim($1) . ")"/ex;
+                       }
                }
 
 # check for line continuations in quoted strings with odd counts of "
@@ -3747,8 +3906,11 @@ sub process {
                if ($line =~ /\bseq_printf\s*\(/) {
                        my $fmt = get_quoted_string($line, $rawline);
                        if ($fmt !~ /[^\\]\%/) {
-                               WARN("PREFER_SEQ_PUTS",
-                                    "Prefer seq_puts to seq_printf\n" . $herecurr);
+                               if (WARN("PREFER_SEQ_PUTS",
+                                        "Prefer seq_puts to seq_printf\n" . $herecurr) &&
+                                   $fix) {
+                                       $fixed[$linenr - 1] =~ s/\bseq_printf\b/seq_puts/;
+                               }
                        }
                }
 
@@ -3810,6 +3972,16 @@ sub process {
                        }
                }
 
+# check for new externs in .h files.
+               if ($realfile =~ /\.h$/ &&
+                   $line =~ /^\+\s*(extern\s+)$Type\s*$Ident\s*\(/s) {
+                       if (WARN("AVOID_EXTERNS",
+                                "extern prototypes should be avoided in .h files\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/(.*)\bextern\b\s*(.*)/$1$2/;
+                       }
+               }
+
 # check for new externs in .c files.
                if ($realfile =~ /\.c$/ && defined $stat &&
                    $stat =~ /^.\s*(?:extern\s+)?$Type\s+($Ident)(\s*)\(/s)
@@ -3879,8 +4051,11 @@ sub process {
 
 # check for multiple semicolons
                if ($line =~ /;\s*;\s*$/) {
-                       WARN("ONE_SEMICOLON",
-                            "Statements terminations use 1 semicolon\n" . $herecurr);
+                       if (WARN("ONE_SEMICOLON",
+                                "Statements terminations use 1 semicolon\n" . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/(\s*;\s*){2,}$/;/g;
+                       }
                }
 
 # check for switch/default statements without a break;
@@ -3898,9 +4073,12 @@ sub process {
                }
 
 # check for gcc specific __FUNCTION__
-               if ($line =~ /__FUNCTION__/) {
-                       WARN("USE_FUNC",
-                            "__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr);
+               if ($line =~ /\b__FUNCTION__\b/) {
+                       if (WARN("USE_FUNC",
+                                "__func__ should be used instead of gcc specific __FUNCTION__\n"  . $herecurr) &&
+                           $fix) {
+                               $fixed[$linenr - 1] =~ s/\b__FUNCTION__\b/__func__/g;
+                       }
                }
 
 # check for use of yield()
@@ -4105,13 +4283,8 @@ sub process {
                }
        }
 
-       if ($quiet == 0 && keys %ignore_type) {
-           print "NOTE: Ignored message types:";
-           foreach my $ignore (sort keys %ignore_type) {
-               print " $ignore";
-           }
-           print "\n\n";
-       }
+       hash_show_words(\%use_type, "Used");
+       hash_show_words(\%ignore_type, "Ignored");
 
        if ($clean == 0 && $fix && "@rawlines" ne "@fixed") {
                my $newfile = $filename . ".EXPERIMENTAL-checkpatch-fixes";
index 2283be2..6804179 100755 (executable)
@@ -82,7 +82,7 @@ txt_subst() {
        local infile="$3"
        local tmpfile="$infile.swp"
 
-       sed -e "s/$before/$after/" "$infile" >"$tmpfile"
+       sed -e "s:$before:$after:" "$infile" >"$tmpfile"
        # replace original file with the edited one
        mv "$tmpfile" "$infile"
 }
index cdd9bb9..aa22f94 100644 (file)
@@ -87,6 +87,27 @@ case "${ARCH}" in
                [ -f "${objtree}/vmlinux.SYS" ] && cp -v -- "${objtree}/vmlinux.SYS" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.SYS"
                [ -f "${objtree}/vmlinux.dsk" ] && cp -v -- "${objtree}/vmlinux.dsk" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}.dsk"
                ;;
+       mips)
+               if [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.bin" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.bin" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
+               elif [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.ecoff" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.ecoff" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
+               elif [ -f "${objtree}/arch/mips/boot/compressed/vmlinux.srec" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/compressed/vmlinux.srec" "${tmpdir}/boot/vmlinuz-${KERNELRELEASE}"
+               elif [ -f "${objtree}/vmlinux.32" ]; then
+                       cp -v -- "${objtree}/vmlinux.32" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               elif [ -f "${objtree}/vmlinux.64" ]; then
+                       cp -v -- "${objtree}/vmlinux.64" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               elif [ -f "${objtree}/arch/mips/boot/vmlinux.bin" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/vmlinux.bin" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               elif [ -f "${objtree}/arch/mips/boot/vmlinux.ecoff" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/vmlinux.ecoff" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               elif [ -f "${objtree}/arch/mips/boot/vmlinux.srec" ]; then
+                       cp -v -- "${objtree}/arch/mips/boot/vmlinux.srec" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               elif [ -f "${objtree}/vmlinux" ]; then
+                       cp -v -- "${objtree}/vmlinux" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+               fi
+               ;;
        *)
                [ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-kbuild-${KERNELRELEASE}"
                echo "" >&2
index e54ebd5..6e61a01 100644 (file)
@@ -3428,6 +3428,7 @@ static struct snd_pci_quirk msi_black_list[] = {
        SND_PCI_QUIRK(0x1043, 0x81f2, "ASUS", 0), /* Athlon64 X2 + nvidia */
        SND_PCI_QUIRK(0x1043, 0x81f6, "ASUS", 0), /* nvidia */
        SND_PCI_QUIRK(0x1043, 0x822d, "ASUS", 0), /* Athlon64 X2 + nvidia MCP55 */
+       SND_PCI_QUIRK(0x1179, 0xfb44, "Toshiba Satellite C870", 0), /* AMD Hudson */
        SND_PCI_QUIRK(0x1849, 0x0888, "ASRock", 0), /* Athlon64 X2 + nvidia */
        SND_PCI_QUIRK(0xa0a0, 0x0575, "Aopen MZ915-M", 0), /* ICH6 */
        {}
index cccaf9c..b524f89 100644 (file)
@@ -169,7 +169,7 @@ static void cs_automute(struct hda_codec *codec)
 
        snd_hda_gen_update_outputs(codec);
 
-       if (spec->gpio_eapd_hp) {
+       if (spec->gpio_eapd_hp || spec->gpio_eapd_speaker) {
                spec->gpio_data = spec->gen.hp_jack_present ?
                        spec->gpio_eapd_hp : spec->gpio_eapd_speaker;
                snd_hda_codec_write(codec, 0x01, 0,
@@ -291,10 +291,11 @@ static int cs_init(struct hda_codec *codec)
 {
        struct cs_spec *spec = codec->spec;
 
-       /* init_verb sequence for C0/C1/C2 errata*/
-       snd_hda_sequence_write(codec, cs_errata_init_verbs);
-
-       snd_hda_sequence_write(codec, cs_coef_init_verbs);
+       if (spec->vendor_nid == CS420X_VENDOR_NID) {
+               /* init_verb sequence for C0/C1/C2 errata*/
+               snd_hda_sequence_write(codec, cs_errata_init_verbs);
+               snd_hda_sequence_write(codec, cs_coef_init_verbs);
+       }
 
        snd_hda_gen_init(codec);
 
@@ -307,8 +308,10 @@ static int cs_init(struct hda_codec *codec)
                                    spec->gpio_data);
        }
 
-       init_input_coef(codec);
-       init_digital_coef(codec);
+       if (spec->vendor_nid == CS420X_VENDOR_NID) {
+               init_input_coef(codec);
+               init_digital_coef(codec);
+       }
 
        return 0;
 }
@@ -551,6 +554,76 @@ static int patch_cs420x(struct hda_codec *codec)
        return err;
 }
 
+/*
+ * CS4208 support:
+ * Its layout is no longer compatible with CS4206/CS4207, and the generic
+ * parser seems working fairly well, except for trivial fixups.
+ */
+enum {
+       CS4208_GPIO0,
+};
+
+static const struct hda_model_fixup cs4208_models[] = {
+       { .id = CS4208_GPIO0, .name = "gpio0" },
+       {}
+};
+
+static const struct snd_pci_quirk cs4208_fixup_tbl[] = {
+       /* codec SSID */
+       SND_PCI_QUIRK(0x106b, 0x7100, "MacBookPro 6,1", CS4208_GPIO0),
+       SND_PCI_QUIRK(0x106b, 0x7200, "MacBookPro 6,2", CS4208_GPIO0),
+       {} /* terminator */
+};
+
+static void cs4208_fixup_gpio0(struct hda_codec *codec,
+                              const struct hda_fixup *fix, int action)
+{
+       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+               struct cs_spec *spec = codec->spec;
+               spec->gpio_eapd_hp = 0;
+               spec->gpio_eapd_speaker = 1;
+               spec->gpio_mask = spec->gpio_dir =
+                       spec->gpio_eapd_hp | spec->gpio_eapd_speaker;
+       }
+}
+
+static const struct hda_fixup cs4208_fixups[] = {
+       [CS4208_GPIO0] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs4208_fixup_gpio0,
+       },
+};
+
+static int patch_cs4208(struct hda_codec *codec)
+{
+       struct cs_spec *spec;
+       int err;
+
+       spec = cs_alloc_spec(codec, 0); /* no specific w/a */
+       if (!spec)
+               return -ENOMEM;
+
+       spec->gen.automute_hook = cs_automute;
+
+       snd_hda_pick_fixup(codec, cs4208_models, cs4208_fixup_tbl,
+                          cs4208_fixups);
+       snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PRE_PROBE);
+
+       err = cs_parse_auto_config(codec);
+       if (err < 0)
+               goto error;
+
+       codec->patch_ops = cs_patch_ops;
+
+       snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_PROBE);
+
+       return 0;
+
+ error:
+       cs_free(codec);
+       return err;
+}
+
 /*
  * Cirrus Logic CS4210
  *
@@ -991,6 +1064,7 @@ static int patch_cs4213(struct hda_codec *codec)
 static const struct hda_codec_preset snd_hda_preset_cirrus[] = {
        { .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x },
        { .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x },
+       { .id = 0x10134208, .name = "CS4208", .patch = patch_cs4208 },
        { .id = 0x10134210, .name = "CS4210", .patch = patch_cs4210 },
        { .id = 0x10134213, .name = "CS4213", .patch = patch_cs4213 },
        {} /* terminator */
@@ -998,6 +1072,7 @@ static const struct hda_codec_preset snd_hda_preset_cirrus[] = {
 
 MODULE_ALIAS("snd-hda-codec-id:10134206");
 MODULE_ALIAS("snd-hda-codec-id:10134207");
+MODULE_ALIAS("snd-hda-codec-id:10134208");
 MODULE_ALIAS("snd-hda-codec-id:10134210");
 MODULE_ALIAS("snd-hda-codec-id:10134213");
 
index 9a58893..3d8cd04 100644 (file)
@@ -44,6 +44,8 @@ static bool static_hdmi_pcm;
 module_param(static_hdmi_pcm, bool, 0644);
 MODULE_PARM_DESC(static_hdmi_pcm, "Don't restrict PCM parameters per ELD info");
 
+#define is_haswell(codec)  ((codec)->vendor_id == 0x80862807)
+
 struct hdmi_spec_per_cvt {
        hda_nid_t cvt_nid;
        int assigned;
@@ -894,6 +896,11 @@ static void hdmi_setup_audio_infoframe(struct hda_codec *codec,
        if (!channels)
                return;
 
+       if (is_haswell(codec))
+               snd_hda_codec_write(codec, pin_nid, 0,
+                                           AC_VERB_SET_AMP_GAIN_MUTE,
+                                           AMP_OUT_UNMUTE);
+
        eld = &per_pin->sink_eld;
        if (!eld->monitor_present)
                return;
@@ -1033,10 +1040,10 @@ static void hdmi_unsol_event(struct hda_codec *codec, unsigned int res)
                hdmi_non_intrinsic_event(codec, res);
 }
 
-static void haswell_verify_pin_D0(struct hda_codec *codec,
+static void haswell_verify_D0(struct hda_codec *codec,
                hda_nid_t cvt_nid, hda_nid_t nid)
 {
-       int pwr, lamp, ramp;
+       int pwr;
 
        /* For Haswell, the converter 1/2 may keep in D3 state after bootup,
         * thus pins could only choose converter 0 for use. Make sure the
@@ -1052,25 +1059,6 @@ static void haswell_verify_pin_D0(struct hda_codec *codec,
                pwr = (pwr & AC_PWRST_ACTUAL) >> AC_PWRST_ACTUAL_SHIFT;
                snd_printd("Haswell HDMI audio: Power for pin 0x%x is now D%d\n", nid, pwr);
        }
-
-       lamp = snd_hda_codec_read(codec, nid, 0,
-                                 AC_VERB_GET_AMP_GAIN_MUTE,
-                                 AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT);
-       ramp = snd_hda_codec_read(codec, nid, 0,
-                                 AC_VERB_GET_AMP_GAIN_MUTE,
-                                 AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT);
-       if (lamp != ramp) {
-               snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_AMP_GAIN_MUTE,
-                                   AC_AMP_SET_RIGHT | AC_AMP_SET_OUTPUT | lamp);
-
-               lamp = snd_hda_codec_read(codec, nid, 0,
-                                 AC_VERB_GET_AMP_GAIN_MUTE,
-                                 AC_AMP_GET_LEFT | AC_AMP_GET_OUTPUT);
-               ramp = snd_hda_codec_read(codec, nid, 0,
-                                 AC_VERB_GET_AMP_GAIN_MUTE,
-                                 AC_AMP_GET_RIGHT | AC_AMP_GET_OUTPUT);
-               snd_printd("Haswell HDMI audio: Mute after set on pin 0x%x: [0x%x 0x%x]\n", nid, lamp, ramp);
-       }
 }
 
 /*
@@ -1087,8 +1075,8 @@ static int hdmi_setup_stream(struct hda_codec *codec, hda_nid_t cvt_nid,
        int pinctl;
        int new_pinctl = 0;
 
-       if (codec->vendor_id == 0x80862807)
-               haswell_verify_pin_D0(codec, cvt_nid, pin_nid);
+       if (is_haswell(codec))
+               haswell_verify_D0(codec, cvt_nid, pin_nid);
 
        if (snd_hda_query_pin_caps(codec, pin_nid) & AC_PINCAP_HBR) {
                pinctl = snd_hda_codec_read(codec, pin_nid, 0,
@@ -1227,7 +1215,7 @@ static int hdmi_pcm_open(struct hda_pcm_stream *hinfo,
                            mux_idx);
 
        /* configure unused pins to choose other converters */
-       if (codec->vendor_id == 0x80862807)
+       if (is_haswell(codec))
                haswell_config_cvts(codec, pin_idx, mux_idx);
 
        snd_hda_spdif_ctls_assign(codec, pin_idx, per_cvt->cvt_nid);
@@ -1358,14 +1346,10 @@ static void hdmi_present_sense(struct hdmi_spec_per_pin *per_pin, int repoll)
                /* Haswell-specific workaround: re-setup when the transcoder is
                 * changed during the stream playback
                 */
-               if (codec->vendor_id == 0x80862807 &&
-                   eld->eld_valid && !old_eld_valid && per_pin->setup) {
-                       snd_hda_codec_write(codec, pin_nid, 0,
-                                           AC_VERB_SET_AMP_GAIN_MUTE,
-                                           AMP_OUT_UNMUTE);
+               if (is_haswell(codec) &&
+                   eld->eld_valid && !old_eld_valid && per_pin->setup)
                        hdmi_setup_audio_infoframe(codec, per_pin,
                                                   per_pin->non_pcm);
-               }
        }
        mutex_unlock(&pin_eld->lock);
 
@@ -1405,7 +1389,7 @@ static int hdmi_add_pin(struct hda_codec *codec, hda_nid_t pin_nid)
        if (get_defcfg_connect(config) == AC_JACK_PORT_NONE)
                return 0;
 
-       if (codec->vendor_id == 0x80862807)
+       if (is_haswell(codec))
                intel_haswell_fixup_connect_list(codec, pin_nid);
 
        pin_idx = spec->num_pins;
@@ -2014,7 +1998,7 @@ static int patch_generic_hdmi(struct hda_codec *codec)
        codec->spec = spec;
        hdmi_array_init(spec, 4);
 
-       if (codec->vendor_id == 0x80862807) {
+       if (is_haswell(codec)) {
                intel_haswell_enable_all_pins(codec, true);
                intel_haswell_fixup_enable_dp12(codec);
        }
@@ -2025,7 +2009,7 @@ static int patch_generic_hdmi(struct hda_codec *codec)
                return -EINVAL;
        }
        codec->patch_ops = generic_hdmi_patch_ops;
-       if (codec->vendor_id == 0x80862807) {
+       if (is_haswell(codec)) {
                codec->patch_ops.set_power_state = haswell_set_power_state;
                codec->dp_mst = true;
        }
index 9e9378c..bc07d36 100644 (file)
@@ -3443,6 +3443,56 @@ static void alc283_fixup_chromebook(struct hda_codec *codec,
        }
 }
 
+/* mute tablet speaker pin (0x14) via dock plugging in addition */
+static void asus_tx300_automute(struct hda_codec *codec)
+{
+       struct alc_spec *spec = codec->spec;
+       snd_hda_gen_update_outputs(codec);
+       if (snd_hda_jack_detect(codec, 0x1b))
+               spec->gen.mute_bits |= (1ULL << 0x14);
+}
+
+static void alc282_fixup_asus_tx300(struct hda_codec *codec,
+                                   const struct hda_fixup *fix, int action)
+{
+       struct alc_spec *spec = codec->spec;
+       /* TX300 needs to set up GPIO2 for the speaker amp */
+       static const struct hda_verb gpio2_verbs[] = {
+               { 0x01, AC_VERB_SET_GPIO_MASK, 0x04 },
+               { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x04 },
+               { 0x01, AC_VERB_SET_GPIO_DATA, 0x04 },
+               {}
+       };
+       static const struct hda_pintbl dock_pins[] = {
+               { 0x1b, 0x21114000 }, /* dock speaker pin */
+               {}
+       };
+       struct snd_kcontrol *kctl;
+
+       switch (action) {
+       case HDA_FIXUP_ACT_PRE_PROBE:
+               snd_hda_add_verbs(codec, gpio2_verbs);
+               snd_hda_apply_pincfgs(codec, dock_pins);
+               spec->gen.auto_mute_via_amp = 1;
+               spec->gen.automute_hook = asus_tx300_automute;
+               snd_hda_jack_detect_enable_callback(codec, 0x1b,
+                                                   HDA_GEN_HP_EVENT,
+                                                   snd_hda_gen_hp_automute);
+               break;
+       case HDA_FIXUP_ACT_BUILD:
+               /* this is a bit tricky; give more sane names for the main
+                * (tablet) speaker and the dock speaker, respectively
+                */
+               kctl = snd_hda_find_mixer_ctl(codec, "Speaker Playback Switch");
+               if (kctl)
+                       strcpy(kctl->id.name, "Dock Speaker Playback Switch");
+               kctl = snd_hda_find_mixer_ctl(codec, "Bass Speaker Playback Switch");
+               if (kctl)
+                       strcpy(kctl->id.name, "Speaker Playback Switch");
+               break;
+       }
+}
+
 enum {
        ALC269_FIXUP_SONY_VAIO,
        ALC275_FIXUP_SONY_VAIO_GPIO2,
@@ -3480,6 +3530,7 @@ enum {
        ALC269_FIXUP_LIMIT_INT_MIC_BOOST,
        ALC269VB_FIXUP_ORDISSIMO_EVE2,
        ALC283_FIXUP_CHROME_BOOK,
+       ALC282_FIXUP_ASUS_TX300,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -3735,6 +3786,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc283_fixup_chromebook,
        },
+       [ALC282_FIXUP_ASUS_TX300] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc282_fixup_asus_tx300,
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -3784,6 +3839,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x1983, "HP Pavilion", ALC269_FIXUP_HP_MUTE_LED_MIC1),
        SND_PCI_QUIRK(0x103c, 0x21ed, "HP Falco Chromebook", ALC283_FIXUP_CHROME_BOOK),
        SND_PCI_QUIRK_VENDOR(0x103c, "HP", ALC269_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x115d, "Asus 1015E", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
        SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_DMIC),
index 0ecf356..bb53dea 100644 (file)
@@ -649,7 +649,7 @@ static int atmel_ssc_prepare(struct snd_pcm_substream *substream,
        dma_params = ssc_p->dma_params[dir];
 
        ssc_writel(ssc_p->ssc->regs, CR, dma_params->mask->ssc_enable);
-       ssc_writel(ssc_p->ssc->regs, IER, dma_params->mask->ssc_error);
+       ssc_writel(ssc_p->ssc->regs, IDR, dma_params->mask->ssc_error);
 
        pr_debug("%s enabled SSC_SR=0x%08x\n",
                        dir ? "receive" : "transmit",
index 15106c0..b33b45d 100644 (file)
@@ -107,7 +107,7 @@ config SND_SOC_ALL_CODECS
        select SND_SOC_WM8782
        select SND_SOC_WM8804 if SND_SOC_I2C_AND_SPI
        select SND_SOC_WM8900 if I2C
-       select SND_SOC_WM8903 if I2C && GENERIC_HARDIRQS
+       select SND_SOC_WM8903 if I2C
        select SND_SOC_WM8904 if I2C
        select SND_SOC_WM8940 if I2C
        select SND_SOC_WM8955 if I2C
index 4d3c8fd..ea141e1 100644 (file)
@@ -125,6 +125,10 @@ static int mc13783_write(struct snd_soc_codec *codec,
 
        ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
 
+       /* include errata fix for spi audio problems */
+       if (reg == MC13783_AUDIO_CODEC || reg == MC13783_AUDIO_DAC)
+               ret = mc13xxx_reg_write(priv->mc13xxx, reg, value);
+
        mc13xxx_unlock(priv->mc13xxx);
 
        return ret;
index 704e246..b7ab71f 100644 (file)
@@ -198,6 +198,7 @@ config SND_SOC_IMX_SPDIF
        select SND_SOC_IMX_PCM_DMA
        select SND_SOC_FSL_SPDIF
        select SND_SOC_SPDIF
+       select REGMAP_MMIO
        help
          SoC Audio support for i.MX boards with S/PDIF
          Say Y if you want to add support for SoC audio on an i.MX board with
index ab17381..d3bf71a 100644 (file)
@@ -335,7 +335,8 @@ static int imx_audmux_probe(struct platform_device *pdev)
        if (audmux_type == IMX31_AUDMUX)
                audmux_debugfs_init();
 
-       imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node);
+       if (of_id)
+               imx_audmux_parse_dt_defaults(pdev, pdev->dev.of_node);
 
        return 0;
 }
index 7fce340..0f3d73d 100644 (file)
@@ -559,7 +559,8 @@ static int kirkwood_i2s_dev_remove(struct platform_device *pdev)
 
 #ifdef CONFIG_OF
 static struct of_device_id mvebu_audio_of_match[] = {
-       { .compatible = "marvell,mvebu-audio" },
+       { .compatible = "marvell,kirkwood-audio" },
+       { .compatible = "marvell,dove-audio" },
        { }
 };
 MODULE_DEVICE_TABLE(of, mvebu_audio_of_match);
index 9855dfc..2eea184 100644 (file)
@@ -63,7 +63,7 @@ config SND_SOC_SAMSUNG_SMDK_WM8580
 config SND_SOC_SAMSUNG_SMDK_WM8994
        tristate "SoC I2S Audio support for WM8994 on SMDK"
        depends on SND_SOC_SAMSUNG
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_WM8994
        select SND_SOC_WM8994
        select SND_SAMSUNG_I2S
@@ -151,7 +151,7 @@ config SND_SOC_SMARTQ
 config SND_SOC_GONI_AQUILA_WM8994
        tristate "SoC I2S Audio support for AQUILA/GONI - WM8994"
        depends on SND_SOC_SAMSUNG && (MACH_GONI || MACH_AQUILA)
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select SND_SAMSUNG_I2S
        select MFD_WM8994
        select SND_SOC_WM8994
@@ -177,7 +177,7 @@ config SND_SOC_SMDK_WM8580_PCM
 config SND_SOC_SMDK_WM8994_PCM
        tristate "SoC PCM Audio support for WM8994 on SMDK"
        depends on SND_SOC_SAMSUNG
-       depends on I2C=y && GENERIC_HARDIRQS
+       depends on I2C=y
        select MFD_WM8994
        select SND_SOC_WM8994
        select SND_SAMSUNG_PCM
index 184d900..2df2e91 100644 (file)
@@ -157,9 +157,9 @@ static int rsnd_scu_start(struct rsnd_mod *mod,
        int ret;
 
        /*
-        * SCU will be used if it has RSND_SCU_USB_HPBIF flags
+        * SCU will be used if it has RSND_SCU_USE_HPBIF flags
         */
-       if (!(flags & RSND_SCU_USB_HPBIF)) {
+       if (!(flags & RSND_SCU_USE_HPBIF)) {
                /* it use PIO transter */
                dev_dbg(dev, "%s%d is not used\n",
                        rsnd_mod_name(mod), rsnd_mod_id(mod));
index c5dc1ad..3a0ff7f 100644 (file)
@@ -394,6 +394,8 @@ ifeq ($(ARCH),x86)
 LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
 endif
 LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
 
 BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
 BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -439,7 +441,6 @@ PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
 ifneq ($(OUTPUT),)
   CFLAGS += -I$(OUTPUT)
 endif
-LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
 
 ifdef NO_LIBELF
 EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
index f988d38..5ebd0c3 100644 (file)
@@ -277,6 +277,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
                .tool = {
                        .sample = process_sample_event,
                        .mmap   = perf_event__process_mmap,
+                       .mmap2  = perf_event__process_mmap2,
                        .comm   = perf_event__process_comm,
                        .exit   = perf_event__process_exit,
                        .fork   = perf_event__process_fork,
index 9b336fd..423875c 100644 (file)
@@ -123,6 +123,19 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
        return err;
 }
 
+static int perf_event__repipe_mmap2(struct perf_tool *tool,
+                                  union perf_event *event,
+                                  struct perf_sample *sample,
+                                  struct machine *machine)
+{
+       int err;
+
+       err = perf_event__process_mmap2(tool, event, sample, machine);
+       perf_event__repipe(tool, event, sample, machine);
+
+       return err;
+}
+
 static int perf_event__repipe_fork(struct perf_tool *tool,
                                   union perf_event *event,
                                   struct perf_sample *sample,
@@ -339,6 +352,7 @@ static int __cmd_inject(struct perf_inject *inject)
 
        if (inject->build_ids || inject->sched_stat) {
                inject->tool.mmap         = perf_event__repipe_mmap;
+               inject->tool.mmap2        = perf_event__repipe_mmap2;
                inject->tool.fork         = perf_event__repipe_fork;
                inject->tool.tracing_data = perf_event__repipe_tracing_data;
        }
@@ -390,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
                .tool = {
                        .sample         = perf_event__repipe_sample,
                        .mmap           = perf_event__repipe,
+                       .mmap2          = perf_event__repipe,
                        .comm           = perf_event__repipe,
                        .fork           = perf_event__repipe,
                        .exit           = perf_event__repipe,
index 47b3540..935d522 100644 (file)
@@ -1165,16 +1165,16 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm)
                struct perf_event_attr *attr = &pos->attr;
 
                /* make sure these *are* set */
-               attr->sample_type |= PERF_SAMPLE_TID;
-               attr->sample_type |= PERF_SAMPLE_TIME;
-               attr->sample_type |= PERF_SAMPLE_CPU;
-               attr->sample_type |= PERF_SAMPLE_RAW;
+               perf_evsel__set_sample_bit(pos, TID);
+               perf_evsel__set_sample_bit(pos, TIME);
+               perf_evsel__set_sample_bit(pos, CPU);
+               perf_evsel__set_sample_bit(pos, RAW);
                /* make sure these are *not*; want as small a sample as possible */
-               attr->sample_type &= ~PERF_SAMPLE_PERIOD;
-               attr->sample_type &= ~PERF_SAMPLE_IP;
-               attr->sample_type &= ~PERF_SAMPLE_CALLCHAIN;
-               attr->sample_type &= ~PERF_SAMPLE_ADDR;
-               attr->sample_type &= ~PERF_SAMPLE_READ;
+               perf_evsel__reset_sample_bit(pos, PERIOD);
+               perf_evsel__reset_sample_bit(pos, IP);
+               perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+               perf_evsel__reset_sample_bit(pos, ADDR);
+               perf_evsel__reset_sample_bit(pos, READ);
                attr->mmap = 0;
                attr->comm = 0;
                attr->task = 0;
index 791b432..253133a 100644 (file)
@@ -190,6 +190,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
                .tool = {
                        .sample         = process_sample_event,
                        .mmap           = perf_event__process_mmap,
+                       .mmap2          = perf_event__process_mmap2,
                        .comm           = perf_event__process_comm,
                        .lost           = perf_event__process_lost,
                        .fork           = perf_event__process_fork,
index 9725aa3..8e50d8d 100644 (file)
@@ -744,6 +744,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
                .tool = {
                        .sample          = process_sample_event,
                        .mmap            = perf_event__process_mmap,
+                       .mmap2           = perf_event__process_mmap2,
                        .comm            = perf_event__process_comm,
                        .exit            = perf_event__process_exit,
                        .fork            = perf_event__process_fork,
index 93a34ce..7f31a3d 100644 (file)
@@ -542,6 +542,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 static struct perf_tool perf_script = {
        .sample          = process_sample_event,
        .mmap            = perf_event__process_mmap,
+       .mmap2           = perf_event__process_mmap2,
        .comm            = perf_event__process_comm,
        .exit            = perf_event__process_exit,
        .fork            = perf_event__process_fork,
index b6f0725..f5aa637 100644 (file)
@@ -100,7 +100,9 @@ static size_t syscall_arg__scnprintf_mmap_flags(char *bf, size_t size,
 
        P_MMAP_FLAG(SHARED);
        P_MMAP_FLAG(PRIVATE);
+#ifdef MAP_32BIT
        P_MMAP_FLAG(32BIT);
+#endif
        P_MMAP_FLAG(ANONYMOUS);
        P_MMAP_FLAG(DENYWRITE);
        P_MMAP_FLAG(EXECUTABLE);
@@ -994,6 +996,9 @@ again:
 
                        handler = evsel->handler.func;
                        handler(trace, evsel, &sample);
+
+                       if (done)
+                               goto out_unmap_evlist;
                }
        }
 
index 8bbeba3..1e67437 100644 (file)
@@ -111,6 +111,10 @@ static struct test {
                .desc = "Test using a dummy software event to keep tracking",
                .func = test__keep_tracking,
        },
+       {
+               .desc = "Test parsing with no sample_id_all bit set",
+               .func = test__parse_no_sample_id_all,
+       },
        {
                .func = NULL,
        },
diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c
new file mode 100644 (file)
index 0000000..e117b6c
--- /dev/null
@@ -0,0 +1,108 @@
+#include <sys/types.h>
+#include <stddef.h>
+
+#include "tests.h"
+
+#include "event.h"
+#include "evlist.h"
+#include "header.h"
+#include "util.h"
+
+static int process_event(struct perf_evlist **pevlist, union perf_event *event)
+{
+       struct perf_sample sample;
+
+       if (event->header.type == PERF_RECORD_HEADER_ATTR) {
+               if (perf_event__process_attr(NULL, event, pevlist)) {
+                       pr_debug("perf_event__process_attr failed\n");
+                       return -1;
+               }
+               return 0;
+       }
+
+       if (event->header.type >= PERF_RECORD_USER_TYPE_START)
+               return -1;
+
+       if (!*pevlist)
+               return -1;
+
+       if (perf_evlist__parse_sample(*pevlist, event, &sample)) {
+               pr_debug("perf_evlist__parse_sample failed\n");
+               return -1;
+       }
+
+       return 0;
+}
+
+static int process_events(union perf_event **events, size_t count)
+{
+       struct perf_evlist *evlist = NULL;
+       int err = 0;
+       size_t i;
+
+       for (i = 0; i < count && !err; i++)
+               err = process_event(&evlist, events[i]);
+
+       if (evlist)
+               perf_evlist__delete(evlist);
+
+       return err;
+}
+
+struct test_attr_event {
+       struct attr_event attr;
+       u64 id;
+};
+
+/**
+ * test__parse_no_sample_id_all - test parsing with no sample_id_all bit set.
+ *
+ * This function tests parsing data produced on kernel's that do not support the
+ * sample_id_all bit.  Without the sample_id_all bit, non-sample events (such as
+ * mmap events) do not have an id sample appended, and consequently logic
+ * designed to determine the id will not work.  That case happens when there is
+ * more than one selected event, so this test processes three events: 2
+ * attributes representing the selected events and one mmap event.
+ *
+ * Return: %0 on success, %-1 if the test fails.
+ */
+int test__parse_no_sample_id_all(void)
+{
+       int err;
+
+       struct test_attr_event event1 = {
+               .attr = {
+                       .header = {
+                               .type = PERF_RECORD_HEADER_ATTR,
+                               .size = sizeof(struct test_attr_event),
+                       },
+               },
+               .id = 1,
+       };
+       struct test_attr_event event2 = {
+               .attr = {
+                       .header = {
+                               .type = PERF_RECORD_HEADER_ATTR,
+                               .size = sizeof(struct test_attr_event),
+                       },
+               },
+               .id = 2,
+       };
+       struct mmap_event event3 = {
+               .header = {
+                       .type = PERF_RECORD_MMAP,
+                       .size = sizeof(struct mmap_event),
+               },
+       };
+       union perf_event *events[] = {
+               (union perf_event *)&event1,
+               (union perf_event *)&event2,
+               (union perf_event *)&event3,
+       };
+
+       err = process_events(events, ARRAY_SIZE(events));
+       if (err)
+               return -1;
+
+       return 0;
+}
index 72d8881..b8a7056 100644 (file)
@@ -50,7 +50,7 @@ int test__PERF_RECORD(void)
        struct perf_sample sample;
        const char *cmd = "sleep";
        const char *argv[] = { cmd, "1", NULL, };
-       char *bname;
+       char *bname, *mmap_filename;
        u64 prev_time = 0;
        bool found_cmd_mmap = false,
             found_libc_mmap = false,
@@ -212,6 +212,7 @@ int test__PERF_RECORD(void)
 
                                if ((type == PERF_RECORD_COMM ||
                                     type == PERF_RECORD_MMAP ||
+                                    type == PERF_RECORD_MMAP2 ||
                                     type == PERF_RECORD_FORK ||
                                     type == PERF_RECORD_EXIT) &&
                                     (pid_t)event->comm.pid != evlist->workload.pid) {
@@ -220,7 +221,8 @@ int test__PERF_RECORD(void)
                                }
 
                                if ((type == PERF_RECORD_COMM ||
-                                    type == PERF_RECORD_MMAP) &&
+                                    type == PERF_RECORD_MMAP ||
+                                    type == PERF_RECORD_MMAP2) &&
                                     event->comm.pid != event->comm.tid) {
                                        pr_debug("%s with different pid/tid!\n", name);
                                        ++errs;
@@ -236,7 +238,12 @@ int test__PERF_RECORD(void)
                                case PERF_RECORD_EXIT:
                                        goto found_exit;
                                case PERF_RECORD_MMAP:
-                                       bname = strrchr(event->mmap.filename, '/');
+                                       mmap_filename = event->mmap.filename;
+                                       goto check_bname;
+                               case PERF_RECORD_MMAP2:
+                                       mmap_filename = event->mmap2.filename;
+                               check_bname:
+                                       bname = strrchr(mmap_filename, '/');
                                        if (bname != NULL) {
                                                if (!found_cmd_mmap)
                                                        found_cmd_mmap = !strcmp(bname + 1, cmd);
@@ -245,7 +252,7 @@ int test__PERF_RECORD(void)
                                                if (!found_ld_mmap)
                                                        found_ld_mmap = !strncmp(bname + 1, "ld", 2);
                                        } else if (!found_vdso_mmap)
-                                               found_vdso_mmap = !strcmp(event->mmap.filename, "[vdso]");
+                                               found_vdso_mmap = !strcmp(mmap_filename, "[vdso]");
                                        break;
 
                                case PERF_RECORD_SAMPLE:
index c048b58..e0ac713 100644 (file)
@@ -39,5 +39,6 @@ int test__perf_time_to_tsc(void);
 int test__code_reading(void);
 int test__sample_parsing(void);
 int test__keep_tracking(void);
+int test__parse_no_sample_id_all(void);
 
 #endif /* TESTS_H */
index 5b4fb33..194e2f4 100644 (file)
@@ -350,9 +350,9 @@ static int hist_entry__period_snprintf(struct perf_hpp *hpp,
 }
 
 static int hist_entry__fprintf(struct hist_entry *he, size_t size,
-                              struct hists *hists, FILE *fp)
+                              struct hists *hists,
+                              char *bf, size_t bfsz, FILE *fp)
 {
-       char bf[512];
        int ret;
        struct perf_hpp hpp = {
                .buf            = bf,
@@ -360,8 +360,8 @@ static int hist_entry__fprintf(struct hist_entry *he, size_t size,
        };
        bool color = !symbol_conf.field_sep;
 
-       if (size == 0 || size > sizeof(bf))
-               size = hpp.size = sizeof(bf);
+       if (size == 0 || size > bfsz)
+               size = hpp.size = bfsz;
 
        ret = hist_entry__period_snprintf(&hpp, he, color);
        hist_entry__sort_snprintf(he, bf + ret, size - ret, hists);
@@ -392,6 +392,8 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
                .ptr    = hists_to_evsel(hists),
        };
        bool first = true;
+       size_t linesz;
+       char *line = NULL;
 
        init_rem_hits();
 
@@ -479,6 +481,13 @@ size_t hists__fprintf(struct hists *hists, bool show_header, int max_rows,
                goto out;
 
 print_entries:
+       linesz = hists__sort_list_width(hists) + 3 + 1;
+       line = malloc(linesz);
+       if (line == NULL) {
+               ret = -1;
+               goto out;
+       }
+
        for (nd = rb_first(&hists->entries); nd; nd = rb_next(nd)) {
                struct hist_entry *h = rb_entry(nd, struct hist_entry, rb_node);
                float percent = h->stat.period * 100.0 /
@@ -490,10 +499,10 @@ print_entries:
                if (percent < min_pcnt)
                        continue;
 
-               ret += hist_entry__fprintf(h, max_cols, hists, fp);
+               ret += hist_entry__fprintf(h, max_cols, hists, line, linesz, fp);
 
                if (max_rows && ++nr_rows >= max_rows)
-                       goto out;
+                       break;
 
                if (h->ms.map == NULL && verbose > 1) {
                        __map_groups__fprintf_maps(&h->thread->mg,
@@ -501,6 +510,8 @@ print_entries:
                        fprintf(fp, "%.10s end\n", graph_dotted_line);
                }
        }
+
+       free(line);
 out:
        free(rem_sq_bracket);
 
index fb58409..7ded71d 100644 (file)
@@ -67,6 +67,7 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
 struct perf_tool build_id__mark_dso_hit_ops = {
        .sample = build_id__mark_dso_hit,
        .mmap   = perf_event__process_mmap,
+       .mmap2  = perf_event__process_mmap2,
        .fork   = perf_event__process_fork,
        .exit   = perf_event__exit_del_thread,
        .attr            = perf_event__process_attr,
index 8d51f21..9b393e7 100644 (file)
@@ -11,6 +11,7 @@
 static const char *perf_event__names[] = {
        [0]                                     = "TOTAL",
        [PERF_RECORD_MMAP]                      = "MMAP",
+       [PERF_RECORD_MMAP2]                     = "MMAP2",
        [PERF_RECORD_LOST]                      = "LOST",
        [PERF_RECORD_COMM]                      = "COMM",
        [PERF_RECORD_EXIT]                      = "EXIT",
@@ -186,7 +187,7 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                return -1;
        }
 
-       event->header.type = PERF_RECORD_MMAP;
+       event->header.type = PERF_RECORD_MMAP2;
        /*
         * Just like the kernel, see __perf_event_mmap in kernel/perf_event.c
         */
@@ -197,7 +198,9 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                char prot[5];
                char execname[PATH_MAX];
                char anonstr[] = "//anon";
+               unsigned int ino;
                size_t size;
+               ssize_t n;
 
                if (fgets(bf, sizeof(bf), fp) == NULL)
                        break;
@@ -206,9 +209,16 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                strcpy(execname, "");
 
                /* 00400000-0040c000 r-xp 00000000 fd:01 41038  /bin/cat */
-               sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %*x:%*x %*u %s\n",
-                      &event->mmap.start, &event->mmap.len, prot,
-                      &event->mmap.pgoff, execname);
+               n = sscanf(bf, "%"PRIx64"-%"PRIx64" %s %"PRIx64" %x:%x %u %s\n",
+                      &event->mmap2.start, &event->mmap2.len, prot,
+                      &event->mmap2.pgoff, &event->mmap2.maj,
+                      &event->mmap2.min,
+                      &ino, execname);
+
+               event->mmap2.ino = (u64)ino;
+
+               if (n != 8)
+                       continue;
 
                if (prot[2] != 'x')
                        continue;
@@ -217,15 +227,15 @@ static int perf_event__synthesize_mmap_events(struct perf_tool *tool,
                        strcpy(execname, anonstr);
 
                size = strlen(execname) + 1;
-               memcpy(event->mmap.filename, execname, size);
+               memcpy(event->mmap2.filename, execname, size);
                size = PERF_ALIGN(size, sizeof(u64));
-               event->mmap.len -= event->mmap.start;
-               event->mmap.header.size = (sizeof(event->mmap) -
-                                          (sizeof(event->mmap.filename) - size));
-               memset(event->mmap.filename + size, 0, machine->id_hdr_size);
-               event->mmap.header.size += machine->id_hdr_size;
-               event->mmap.pid = tgid;
-               event->mmap.tid = pid;
+               event->mmap2.len -= event->mmap.start;
+               event->mmap2.header.size = (sizeof(event->mmap2) -
+                                       (sizeof(event->mmap2.filename) - size));
+               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+               event->mmap2.header.size += machine->id_hdr_size;
+               event->mmap2.pid = tgid;
+               event->mmap2.tid = pid;
 
                if (process(tool, event, &synth_sample, machine) != 0) {
                        rc = -1;
@@ -527,6 +537,17 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
                       event->mmap.len, event->mmap.pgoff, event->mmap.filename);
 }
 
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp)
+{
+       return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64
+                          " %02x:%02x %"PRIu64" %"PRIu64"]: %s\n",
+                      event->mmap2.pid, event->mmap2.tid, event->mmap2.start,
+                      event->mmap2.len, event->mmap2.pgoff, event->mmap2.maj,
+                      event->mmap2.min, event->mmap2.ino,
+                      event->mmap2.ino_generation,
+                      event->mmap2.filename);
+}
+
 int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
                             union perf_event *event,
                             struct perf_sample *sample __maybe_unused,
@@ -535,6 +556,14 @@ int perf_event__process_mmap(struct perf_tool *tool __maybe_unused,
        return machine__process_mmap_event(machine, event);
 }
 
+int perf_event__process_mmap2(struct perf_tool *tool __maybe_unused,
+                            union perf_event *event,
+                            struct perf_sample *sample __maybe_unused,
+                            struct machine *machine)
+{
+       return machine__process_mmap2_event(machine, event);
+}
+
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp)
 {
        return fprintf(fp, "(%d:%d):(%d:%d)\n",
@@ -574,6 +603,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
        case PERF_RECORD_MMAP:
                ret += perf_event__fprintf_mmap(event, fp);
                break;
+       case PERF_RECORD_MMAP2:
+               ret += perf_event__fprintf_mmap2(event, fp);
+               break;
        default:
                ret += fprintf(fp, "\n");
        }
index 93130d8..c67ecc4 100644 (file)
@@ -17,6 +17,19 @@ struct mmap_event {
        char filename[PATH_MAX];
 };
 
+struct mmap2_event {
+       struct perf_event_header header;
+       u32 pid, tid;
+       u64 start;
+       u64 len;
+       u64 pgoff;
+       u32 maj;
+       u32 min;
+       u64 ino;
+       u64 ino_generation;
+       char filename[PATH_MAX];
+};
+
 struct comm_event {
        struct perf_event_header header;
        u32 pid, tid;
@@ -159,6 +172,7 @@ struct tracing_data_event {
 union perf_event {
        struct perf_event_header        header;
        struct mmap_event               mmap;
+       struct mmap2_event              mmap2;
        struct comm_event               comm;
        struct fork_event               fork;
        struct lost_event               lost;
@@ -208,6 +222,10 @@ int perf_event__process_mmap(struct perf_tool *tool,
                             union perf_event *event,
                             struct perf_sample *sample,
                             struct machine *machine);
+int perf_event__process_mmap2(struct perf_tool *tool,
+                            union perf_event *event,
+                            struct perf_sample *sample,
+                            struct machine *machine);
 int perf_event__process_fork(struct perf_tool *tool,
                             union perf_event *event,
                             struct perf_sample *sample,
@@ -238,6 +256,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type,
 
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
index b8727ae..f9f77be 100644 (file)
@@ -64,6 +64,16 @@ void perf_evlist__set_id_pos(struct perf_evlist *evlist)
        evlist->is_pos = first->is_pos;
 }
 
+static void perf_evlist__update_id_pos(struct perf_evlist *evlist)
+{
+       struct perf_evsel *evsel;
+
+       list_for_each_entry(evsel, &evlist->entries, node)
+               perf_evsel__calc_id_pos(evsel);
+
+       perf_evlist__set_id_pos(evlist);
+}
+
 static void perf_evlist__purge(struct perf_evlist *evlist)
 {
        struct perf_evsel *pos, *n;
@@ -446,20 +456,25 @@ static int perf_evlist__event2id(struct perf_evlist *evlist,
 static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
                                                   union perf_event *event)
 {
+       struct perf_evsel *first = perf_evlist__first(evlist);
        struct hlist_head *head;
        struct perf_sample_id *sid;
        int hash;
        u64 id;
 
        if (evlist->nr_entries == 1)
-               return perf_evlist__first(evlist);
+               return first;
+
+       if (!first->attr.sample_id_all &&
+           event->header.type != PERF_RECORD_SAMPLE)
+               return first;
 
        if (perf_evlist__event2id(evlist, event, &id))
                return NULL;
 
        /* Synthesized events have an id of zero */
        if (!id)
-               return perf_evlist__first(evlist);
+               return first;
 
        hash = hash_64(id, PERF_EVLIST__HLIST_BITS);
        head = &evlist->heads[hash];
@@ -915,6 +930,8 @@ int perf_evlist__open(struct perf_evlist *evlist)
        struct perf_evsel *evsel;
        int err;
 
+       perf_evlist__update_id_pos(evlist);
+
        list_for_each_entry(evsel, &evlist->entries, node) {
                err = perf_evsel__open(evsel, evlist->cpus, evlist->threads);
                if (err < 0)
index 3612183..0ce9feb 100644 (file)
@@ -27,6 +27,7 @@
 static struct {
        bool sample_id_all;
        bool exclude_guest;
+       bool mmap2;
 } perf_missing_features;
 
 #define FD(e, x, y) (*(int *)xyarray__entry(e->fd, x, y))
@@ -676,8 +677,9 @@ void perf_evsel__config(struct perf_evsel *evsel,
        if (opts->sample_weight)
                attr->sample_type       |= PERF_SAMPLE_WEIGHT;
 
-       attr->mmap = track;
-       attr->comm = track;
+       attr->mmap  = track;
+       attr->mmap2 = track && !perf_missing_features.mmap2;
+       attr->comm  = track;
 
        /*
         * XXX see the function comment above
@@ -1016,6 +1018,8 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
        }
 
 fallback_missing_features:
+       if (perf_missing_features.mmap2)
+               evsel->attr.mmap2 = 0;
        if (perf_missing_features.exclude_guest)
                evsel->attr.exclude_guest = evsel->attr.exclude_host = 0;
 retry_sample_id:
@@ -1080,8 +1084,11 @@ try_fallback:
        if (err != -EINVAL || cpu > 0 || thread > 0)
                goto out_close;
 
-       if (!perf_missing_features.exclude_guest &&
-           (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
+       if (!perf_missing_features.mmap2 && evsel->attr.mmap2) {
+               perf_missing_features.mmap2 = true;
+               goto fallback_missing_features;
+       } else if (!perf_missing_features.exclude_guest &&
+                  (evsel->attr.exclude_guest || evsel->attr.exclude_host)) {
                perf_missing_features.exclude_guest = true;
                goto fallback_missing_features;
        } else if (!perf_missing_features.sample_id_all) {
@@ -1925,6 +1932,7 @@ int perf_evsel__fprintf(struct perf_evsel *evsel,
                if_print(exclude_hv);
                if_print(exclude_idle);
                if_print(mmap);
+               if_print(mmap2);
                if_print(comm);
                if_print(freq);
                if_print(inherit_stat);
index a33197a..26441d0 100644 (file)
@@ -1351,6 +1351,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
 
                fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip);
 
+               fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2);
+               fprintf(fp, ", attr_mmap  = %d", evsel->attr.mmap);
+               fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data);
                if (evsel->ids) {
                        fprintf(fp, ", id = {");
                        for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) {
index 1dca61f..933d14f 100644 (file)
@@ -997,6 +997,54 @@ out_problem:
        return -1;
 }
 
+int machine__process_mmap2_event(struct machine *machine,
+                                union perf_event *event)
+{
+       u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+       struct thread *thread;
+       struct map *map;
+       enum map_type type;
+       int ret = 0;
+
+       if (dump_trace)
+               perf_event__fprintf_mmap2(event, stdout);
+
+       if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+           cpumode == PERF_RECORD_MISC_KERNEL) {
+               ret = machine__process_kernel_mmap_event(machine, event);
+               if (ret < 0)
+                       goto out_problem;
+               return 0;
+       }
+
+       thread = machine__findnew_thread(machine, event->mmap2.pid,
+                                       event->mmap2.pid);
+       if (thread == NULL)
+               goto out_problem;
+
+       if (event->header.misc & PERF_RECORD_MISC_MMAP_DATA)
+               type = MAP__VARIABLE;
+       else
+               type = MAP__FUNCTION;
+
+       map = map__new(&machine->user_dsos, event->mmap2.start,
+                       event->mmap2.len, event->mmap2.pgoff,
+                       event->mmap2.pid, event->mmap2.maj,
+                       event->mmap2.min, event->mmap2.ino,
+                       event->mmap2.ino_generation,
+                       event->mmap2.filename, type);
+
+       if (map == NULL)
+               goto out_problem;
+
+       thread__insert_map(thread, map);
+       return 0;
+
+out_problem:
+       dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
+       return 0;
+}
+
 int machine__process_mmap_event(struct machine *machine, union perf_event *event)
 {
        u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
@@ -1028,7 +1076,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
        map = map__new(&machine->user_dsos, event->mmap.start,
                        event->mmap.len, event->mmap.pgoff,
-                       event->mmap.pid, event->mmap.filename,
+                       event->mmap.pid, 0, 0, 0, 0,
+                       event->mmap.filename,
                        type);
 
        if (map == NULL)
@@ -1101,6 +1150,8 @@ int machine__process_event(struct machine *machine, union perf_event *event)
                ret = machine__process_comm_event(machine, event); break;
        case PERF_RECORD_MMAP:
                ret = machine__process_mmap_event(machine, event); break;
+       case PERF_RECORD_MMAP2:
+               ret = machine__process_mmap2_event(machine, event); break;
        case PERF_RECORD_FORK:
                ret = machine__process_fork_event(machine, event); break;
        case PERF_RECORD_EXIT:
index 0df925b..58a6be1 100644 (file)
@@ -45,6 +45,7 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
 int machine__process_fork_event(struct machine *machine, union perf_event *event);
 int machine__process_lost_event(struct machine *machine, union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event);
+int machine__process_mmap2_event(struct machine *machine, union perf_event *event);
 int machine__process_event(struct machine *machine, union perf_event *event);
 
 typedef void (*machine__process_t)(struct machine *machine, void *data);
index 9e8304c..4f6680d 100644 (file)
@@ -48,7 +48,8 @@ void map__init(struct map *map, enum map_type type,
 }
 
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
-                    u64 pgoff, u32 pid, char *filename,
+                    u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
+                    u64 ino_gen, char *filename,
                     enum map_type type)
 {
        struct map *map = malloc(sizeof(*map));
@@ -62,6 +63,11 @@ struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
                vdso = is_vdso_map(filename);
                no_dso = is_no_dso_memory(filename);
 
+               map->maj = d_maj;
+               map->min = d_min;
+               map->ino = ino;
+               map->ino_generation = ino_gen;
+
                if (anon) {
                        snprintf(newfilename, sizeof(newfilename), "/tmp/perf-%d.map", pid);
                        filename = newfilename;
index 2cc93cb..4886ca2 100644 (file)
@@ -36,6 +36,9 @@ struct map {
        bool                    erange_warned;
        u32                     priv;
        u64                     pgoff;
+       u32                     maj, min; /* only valid for MMAP2 record */
+       u64                     ino;      /* only valid for MMAP2 record */
+       u64                     ino_generation;/* only valid for MMAP2 record */
 
        /* ip -> dso rip */
        u64                     (*map_ip)(struct map *, u64);
@@ -88,8 +91,9 @@ typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
 void map__init(struct map *map, enum map_type type,
               u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct list_head *dsos__list, u64 start, u64 len,
-                    u64 pgoff, u32 pid, char *filename,
-                    enum map_type type);
+                    u64 pgoff, u32 pid, u32 d_maj, u32 d_min, u64 ino,
+                    u64 ino_gen,
+                    char *filename, enum map_type type);
 struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 void map__delete(struct map *map);
 struct map *map__clone(struct map *map);
index 1fc0c62..51f5edf 100644 (file)
@@ -351,6 +351,25 @@ static void perf_event__mmap_swap(union perf_event *event,
        }
 }
 
+static void perf_event__mmap2_swap(union perf_event *event,
+                                 bool sample_id_all)
+{
+       event->mmap2.pid   = bswap_32(event->mmap2.pid);
+       event->mmap2.tid   = bswap_32(event->mmap2.tid);
+       event->mmap2.start = bswap_64(event->mmap2.start);
+       event->mmap2.len   = bswap_64(event->mmap2.len);
+       event->mmap2.pgoff = bswap_64(event->mmap2.pgoff);
+       event->mmap2.maj   = bswap_32(event->mmap2.maj);
+       event->mmap2.min   = bswap_32(event->mmap2.min);
+       event->mmap2.ino   = bswap_64(event->mmap2.ino);
+
+       if (sample_id_all) {
+               void *data = &event->mmap2.filename;
+
+               data += PERF_ALIGN(strlen(data) + 1, sizeof(u64));
+               swap_sample_id_all(event, data);
+       }
+}
 static void perf_event__task_swap(union perf_event *event, bool sample_id_all)
 {
        event->fork.pid  = bswap_32(event->fork.pid);
@@ -455,6 +474,7 @@ typedef void (*perf_event__swap_op)(union perf_event *event,
 
 static perf_event__swap_op perf_event__swap_ops[] = {
        [PERF_RECORD_MMAP]                = perf_event__mmap_swap,
+       [PERF_RECORD_MMAP2]               = perf_event__mmap2_swap,
        [PERF_RECORD_COMM]                = perf_event__comm_swap,
        [PERF_RECORD_FORK]                = perf_event__task_swap,
        [PERF_RECORD_EXIT]                = perf_event__task_swap,
@@ -504,6 +524,7 @@ static int flush_sample_queue(struct perf_session *s,
        u64 limit = os->next_flush;
        u64 last_ts = os->last_sample ? os->last_sample->timestamp : 0ULL;
        unsigned idx = 0, progress_next = os->nr_samples / 16;
+       bool show_progress = limit == ULLONG_MAX;
        int ret;
 
        if (!tool->ordered_samples || !limit)
@@ -526,7 +547,7 @@ static int flush_sample_queue(struct perf_session *s,
                os->last_flush = iter->timestamp;
                list_del(&iter->list);
                list_add(&iter->list, &os->sample_cache);
-               if (++idx >= progress_next) {
+               if (show_progress && (++idx >= progress_next)) {
                        progress_next += os->nr_samples / 16;
                        ui_progress__update(idx, os->nr_samples,
                                            "Processing time ordered events...");
@@ -850,7 +871,8 @@ static struct machine *
             (cpumode == PERF_RECORD_MISC_GUEST_USER))) {
                u32 pid;
 
-               if (event->header.type == PERF_RECORD_MMAP)
+               if (event->header.type == PERF_RECORD_MMAP
+                   || event->header.type == PERF_RECORD_MMAP2)
                        pid = event->mmap.pid;
                else
                        pid = sample->pid;
@@ -977,6 +999,8 @@ static int perf_session_deliver_event(struct perf_session *session,
                                                    sample, evsel, machine);
        case PERF_RECORD_MMAP:
                return tool->mmap(tool, event, sample, machine);
+       case PERF_RECORD_MMAP2:
+               return tool->mmap2(tool, event, sample, machine);
        case PERF_RECORD_COMM:
                return tool->comm(tool, event, sample, machine);
        case PERF_RECORD_FORK:
@@ -1619,52 +1643,26 @@ int __perf_session__set_tracepoints_handlers(struct perf_session *session,
                                             const struct perf_evsel_str_handler *assocs,
                                             size_t nr_assocs)
 {
-       struct perf_evlist *evlist = session->evlist;
-       struct event_format *format;
        struct perf_evsel *evsel;
-       char *tracepoint, *name;
        size_t i;
        int err;
 
        for (i = 0; i < nr_assocs; i++) {
-               err = -ENOMEM;
-               tracepoint = strdup(assocs[i].name);
-               if (tracepoint == NULL)
-                       goto out;
-
-               err = -ENOENT;
-               name = strchr(tracepoint, ':');
-               if (name == NULL)
-                       goto out_free;
-
-               *name++ = '\0';
-               format = pevent_find_event_by_name(session->pevent,
-                                                  tracepoint, name);
-               if (format == NULL) {
-                       /*
-                        * Adding a handler for an event not in the session,
-                        * just ignore it.
-                        */
-                       goto next;
-               }
-
-               evsel = perf_evlist__find_tracepoint_by_id(evlist, format->id);
+               /*
+                * Adding a handler for an event not in the session,
+                * just ignore it.
+                */
+               evsel = perf_evlist__find_tracepoint_by_name(session->evlist, assocs[i].name);
                if (evsel == NULL)
-                       goto next;
+                       continue;
 
                err = -EEXIST;
                if (evsel->handler.func != NULL)
-                       goto out_free;
+                       goto out;
                evsel->handler.func = assocs[i].handler;
-next:
-               free(tracepoint);
        }
 
        err = 0;
 out:
        return err;
-
-out_free:
-       free(tracepoint);
-       goto out;
 }
index 62b16b6..4385816 100644 (file)
@@ -29,6 +29,7 @@ struct perf_tool {
        event_sample    sample,
                        read;
        event_op        mmap,
+                       mmap2,
                        comm,
                        fork,
                        exit,
index ea475cd..8a39dda 100644 (file)
@@ -101,8 +101,11 @@ void kvm_clear_async_pf_completion_queue(struct kvm_vcpu *vcpu)
                                   typeof(*work), queue);
                cancel_work_sync(&work->work);
                list_del(&work->queue);
-               if (!work->done) /* work was canceled */
+               if (!work->done) { /* work was canceled */
+                       mmdrop(work->mm);
+                       kvm_put_kvm(vcpu->kvm); /* == work->vcpu->kvm */
                        kmem_cache_free(async_pf_cache, work);
+               }
        }
 
        spin_lock(&vcpu->async_pf.lock);
index bf040c4..979bff4 100644 (file)
@@ -1058,11 +1058,15 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
 EXPORT_SYMBOL_GPL(gfn_to_hva);
 
 /*
- * The hva returned by this function is only allowed to be read.
- * It should pair with kvm_read_hva() or kvm_read_hva_atomic().
+ * If writable is set to false, the hva returned by this function is only
+ * allowed to be read.
  */
-static unsigned long gfn_to_hva_read(struct kvm *kvm, gfn_t gfn)
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 {
+       struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+       if (writable)
+               *writable = !memslot_is_readonly(slot);
+
        return __gfn_to_hva_many(gfn_to_memslot(kvm, gfn), gfn, NULL, false);
 }
 
@@ -1430,7 +1434,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
        int r;
        unsigned long addr;
 
-       addr = gfn_to_hva_read(kvm, gfn);
+       addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        r = kvm_read_hva(data, (void __user *)addr + offset, len);
@@ -1468,7 +1472,7 @@ int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data,
        gfn_t gfn = gpa >> PAGE_SHIFT;
        int offset = offset_in_page(gpa);
 
-       addr = gfn_to_hva_read(kvm, gfn);
+       addr = gfn_to_hva_prot(kvm, gfn, NULL);
        if (kvm_is_error_hva(addr))
                return -EFAULT;
        pagefault_disable();